1 #include "license.hunspell"
2 #include "license.myspell"
3
4 #include <stdlib.h>
5 #include <string.h>
6 #include <stdio.h>
7
8 #include "hunspell.hxx"
9 #include "hunspell.h"
10 #ifndef MOZILLA_CLIENT
11 # include "config.h"
12 #endif
13 #include "csutil.hxx"
14
15 #include <string>
16
Hunspell(const char * affpath,const char * dpath,const char * key)17 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
18 {
19 encoding = NULL;
20 csconv = NULL;
21 utf8 = 0;
22 complexprefixes = 0;
23 affixpath = mystrdup(affpath);
24 maxdic = 0;
25
26 /* first set up the hash manager */
27 pHMgr[0] = new HashMgr(dpath, affpath, key);
28 if (pHMgr[0]) maxdic = 1;
29
30 /* next set up the affix manager */
31 /* it needs access to the hash manager lookup methods */
32 pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
33
34 /* get the preferred try string and the dictionary */
35 /* encoding from the Affix Manager for that dictionary */
36 char * try_string = pAMgr->get_try_string();
37 encoding = pAMgr->get_encoding();
38 langnum = pAMgr->get_langnum();
39 utf8 = pAMgr->get_utf8();
40 if (!utf8)
41 csconv = get_current_cs(encoding);
42 complexprefixes = pAMgr->get_complexprefixes();
43 wordbreak = pAMgr->get_breaktable();
44
45 /* and finally set up the suggestion manager */
46 pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
47 if (try_string) free(try_string);
48 }
49
~Hunspell()50 Hunspell::~Hunspell()
51 {
52 if (pSMgr) delete pSMgr;
53 if (pAMgr) delete pAMgr;
54 for (int i = 0; i < maxdic; i++) delete pHMgr[i];
55 maxdic = 0;
56 pSMgr = NULL;
57 pAMgr = NULL;
58 #ifdef MOZILLA_CLIENT
59 delete [] csconv;
60 #endif
61 csconv= NULL;
62 if (encoding) free(encoding);
63 encoding = NULL;
64 if (affixpath) free(affixpath);
65 affixpath = NULL;
66 }
67
68 // load extra dictionaries
add_dic(const char * dpath,const char * key)69 int Hunspell::add_dic(const char * dpath, const char * key) {
70 if (maxdic == MAXDIC || !affixpath) return 1;
71 pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
72 if (pHMgr[maxdic]) maxdic++; else return 1;
73 return 0;
74 }
75
76 // make a copy of src at destination while removing all leading
77 // blanks and removing any trailing periods after recording
78 // their presence with the abbreviation flag
79 // also since already going through character by character,
80 // set the capitalization type
81 // return the length of the "cleaned" (and UTF-8 encoded) word
82
cleanword2(char * dest,const char * src,w_char * dest_utf,int * nc,int * pcaptype,int * pabbrev)83 int Hunspell::cleanword2(char * dest, const char * src,
84 w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
85 {
86 unsigned char * p = (unsigned char *) dest;
87 const unsigned char * q = (const unsigned char * ) src;
88
89 // first skip over any leading blanks
90 while ((*q != '\0') && (*q == ' ')) q++;
91
92 // now strip off any trailing periods (recording their presence)
93 *pabbrev = 0;
94 int nl = strlen((const char *)q);
95 while ((nl > 0) && (*(q+nl-1)=='.')) {
96 nl--;
97 (*pabbrev)++;
98 }
99
100 // if no characters are left it can't be capitalized
101 if (nl <= 0) {
102 *pcaptype = NOCAP;
103 *p = '\0';
104 return 0;
105 }
106
107 strncpy(dest, (char *) q, nl);
108 *(dest + nl) = '\0';
109 nl = strlen(dest);
110 if (utf8) {
111 *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
112 // don't check too long words
113 if (*nc >= MAXWORDLEN) return 0;
114 if (*nc == -1) { // big Unicode character (non BMP area)
115 *pcaptype = NOCAP;
116 return nl;
117 }
118 *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
119 } else {
120 *pcaptype = get_captype(dest, nl, csconv);
121 *nc = nl;
122 }
123 return nl;
124 }
125
cleanword(char * dest,const char * src,int * pcaptype,int * pabbrev)126 int Hunspell::cleanword(char * dest, const char * src,
127 int * pcaptype, int * pabbrev)
128 {
129 unsigned char * p = (unsigned char *) dest;
130 const unsigned char * q = (const unsigned char * ) src;
131 int firstcap = 0;
132
133 // first skip over any leading blanks
134 while ((*q != '\0') && (*q == ' ')) q++;
135
136 // now strip off any trailing periods (recording their presence)
137 *pabbrev = 0;
138 int nl = strlen((const char *)q);
139 while ((nl > 0) && (*(q+nl-1)=='.')) {
140 nl--;
141 (*pabbrev)++;
142 }
143
144 // if no characters are left it can't be capitalized
145 if (nl <= 0) {
146 *pcaptype = NOCAP;
147 *p = '\0';
148 return 0;
149 }
150
151 // now determine the capitalization type of the first nl letters
152 int ncap = 0;
153 int nneutral = 0;
154 int nc = 0;
155
156 if (!utf8) {
157 while (nl > 0) {
158 nc++;
159 if (csconv[(*q)].ccase) ncap++;
160 if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
161 *p++ = *q++;
162 nl--;
163 }
164 // remember to terminate the destination string
165 *p = '\0';
166 firstcap = csconv[(unsigned char)(*dest)].ccase;
167 } else {
168 unsigned short idx;
169 w_char t[MAXWORDLEN];
170 nc = u8_u16(t, MAXWORDLEN, src);
171 for (int i = 0; i < nc; i++) {
172 idx = (t[i].h << 8) + t[i].l;
173 unsigned short low = unicodetolower(idx, langnum);
174 if (idx != low) ncap++;
175 if (unicodetoupper(idx, langnum) == low) nneutral++;
176 }
177 u16_u8(dest, MAXWORDUTF8LEN, t, nc);
178 if (ncap) {
179 idx = (t[0].h << 8) + t[0].l;
180 firstcap = (idx != unicodetolower(idx, langnum));
181 }
182 }
183
184 // now finally set the captype
185 if (ncap == 0) {
186 *pcaptype = NOCAP;
187 } else if ((ncap == 1) && firstcap) {
188 *pcaptype = INITCAP;
189 } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
190 *pcaptype = ALLCAP;
191 } else if ((ncap > 1) && firstcap) {
192 *pcaptype = HUHINITCAP;
193 } else {
194 *pcaptype = HUHCAP;
195 }
196 return strlen(dest);
197 }
198
mkallcap(char * p)199 void Hunspell::mkallcap(char * p)
200 {
201 if (utf8) {
202 w_char u[MAXWORDLEN];
203 int nc = u8_u16(u, MAXWORDLEN, p);
204 unsigned short idx;
205 for (int i = 0; i < nc; i++) {
206 idx = (u[i].h << 8) + u[i].l;
207 if (idx != unicodetoupper(idx, langnum)) {
208 u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
209 u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
210 }
211 }
212 u16_u8(p, MAXWORDUTF8LEN, u, nc);
213 } else {
214 while (*p != '\0') {
215 *p = csconv[((unsigned char) *p)].cupper;
216 p++;
217 }
218 }
219 }
220
mkallcap2(char * p,w_char * u,int nc)221 int Hunspell::mkallcap2(char * p, w_char * u, int nc)
222 {
223 if (utf8) {
224 unsigned short idx;
225 for (int i = 0; i < nc; i++) {
226 idx = (u[i].h << 8) + u[i].l;
227 unsigned short up = unicodetoupper(idx, langnum);
228 if (idx != up) {
229 u[i].h = (unsigned char) (up >> 8);
230 u[i].l = (unsigned char) (up & 0x00FF);
231 }
232 }
233 u16_u8(p, MAXWORDUTF8LEN, u, nc);
234 return strlen(p);
235 } else {
236 while (*p != '\0') {
237 *p = csconv[((unsigned char) *p)].cupper;
238 p++;
239 }
240 }
241 return nc;
242 }
243
244
mkallsmall(char * p)245 void Hunspell::mkallsmall(char * p)
246 {
247 while (*p != '\0') {
248 *p = csconv[((unsigned char) *p)].clower;
249 p++;
250 }
251 }
252
mkallsmall2(char * p,w_char * u,int nc)253 int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
254 {
255 if (utf8) {
256 unsigned short idx;
257 for (int i = 0; i < nc; i++) {
258 idx = (u[i].h << 8) + u[i].l;
259 unsigned short low = unicodetolower(idx, langnum);
260 if (idx != low) {
261 u[i].h = (unsigned char) (low >> 8);
262 u[i].l = (unsigned char) (low & 0x00FF);
263 }
264 }
265 u16_u8(p, MAXWORDUTF8LEN, u, nc);
266 return strlen(p);
267 } else {
268 while (*p != '\0') {
269 *p = csconv[((unsigned char) *p)].clower;
270 p++;
271 }
272 }
273 return nc;
274 }
275
276 // convert UTF-8 sharp S codes to latin 1
sharps_u8_l1(char * dest,char * source)277 char * Hunspell::sharps_u8_l1(char * dest, char * source) {
278 char * p = dest;
279 *p = *source;
280 for (p++, source++; *(source - 1); p++, source++) {
281 *p = *source;
282 if (*source == '\x9F') *--p = '\xDF';
283 }
284 return dest;
285 }
286
287 // recursive search for right ss - sharp s permutations
spellsharps(char * base,char * pos,int n,int repnum,char * tmp,int * info,char ** root)288 hentry * Hunspell::spellsharps(char * base, char * pos, int n,
289 int repnum, char * tmp, int * info, char **root) {
290 pos = strstr(pos, "ss");
291 if (pos && (n < MAXSHARPS)) {
292 *pos = '\xC3';
293 *(pos + 1) = '\x9F';
294 hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
295 if (h) return h;
296 *pos = 's';
297 *(pos + 1) = 's';
298 h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
299 if (h) return h;
300 } else if (repnum > 0) {
301 if (utf8) return checkword(base, info, root);
302 return checkword(sharps_u8_l1(tmp, base), info, root);
303 }
304 return NULL;
305 }
306
is_keepcase(const hentry * rv)307 int Hunspell::is_keepcase(const hentry * rv) {
308 return pAMgr && rv->astr && pAMgr->get_keepcase() &&
309 TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
310 }
311
312 /* insert a word to the beginning of the suggestion array and return ns */
insert_sug(char *** slst,char * word,int ns)313 int Hunspell::insert_sug(char ***slst, char * word, int ns) {
314 char * dup = mystrdup(word);
315 if (!dup) return ns;
316 if (ns == MAXSUGGESTION) {
317 ns--;
318 free((*slst)[ns]);
319 }
320 for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
321 (*slst)[0] = dup;
322 return ns + 1;
323 }
324
spell(const char * word,int * info,char ** root)325 int Hunspell::spell(const char * word, int * info, char ** root)
326 {
327 struct hentry * rv=NULL;
328 // need larger vector. For example, Turkish capital letter I converted a
329 // 2-byte UTF-8 character (dotless i) by mkallsmall.
330 char cw[MAXWORDUTF8LEN];
331 char wspace[MAXWORDUTF8LEN];
332 w_char unicw[MAXWORDLEN];
333
334 int info2 = 0;
335 if (!info) info = &info2; else *info = 0;
336
337 // Hunspell supports XML input of the simplified API (see manual)
338 if (strcmp(word, SPELL_XML) == 0) return 1;
339 int nc = strlen(word);
340 int wl2 = 0;
341 if (utf8) {
342 if (nc >= MAXWORDUTF8LEN) return 0;
343 } else {
344 if (nc >= MAXWORDLEN) return 0;
345 }
346 int captype = 0;
347 int abbv = 0;
348 int wl = 0;
349
350 // input conversion
351 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
352 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
353 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
354
355 if (wl == 0 || maxdic == 0) return 1;
356 if (root) *root = NULL;
357
358 // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
359 enum { NBEGIN, NNUM, NSEP };
360 int nstate = NBEGIN;
361 int i;
362
363 for (i = 0; (i < wl); i++) {
364 if ((cw[i] <= '9') && (cw[i] >= '0')) {
365 nstate = NNUM;
366 } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
367 if ((nstate == NSEP) || (i == 0)) break;
368 nstate = NSEP;
369 } else break;
370 }
371 if ((i == wl) && (nstate == NNUM)) return 1;
372
373 switch(captype) {
374 case HUHCAP:
375 /* FALLTHROUGH */
376 case HUHINITCAP:
377 *info += SPELL_ORIGCAP;
378 /* FALLTHROUGH */
379 case NOCAP:
380 rv = checkword(cw, info, root);
381 if ((abbv) && !(rv)) {
382 memcpy(wspace,cw,wl);
383 *(wspace+wl) = '.';
384 *(wspace+wl+1) = '\0';
385 rv = checkword(wspace, info, root);
386 }
387 break;
388 case ALLCAP: {
389 *info += SPELL_ORIGCAP;
390 rv = checkword(cw, info, root);
391 if (rv) break;
392 if (abbv) {
393 memcpy(wspace,cw,wl);
394 *(wspace+wl) = '.';
395 *(wspace+wl+1) = '\0';
396 rv = checkword(wspace, info, root);
397 if (rv) break;
398 }
399 // Spec. prefix handling for Catalan, French, Italian:
400 // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
401 if (pAMgr && strchr(cw, '\'')) {
402 wl = mkallsmall2(cw, unicw, nc);
403 //There are no really sane circumstances where this could fail,
404 //but anyway...
405 if (char * apostrophe = strchr(cw, '\'')) {
406 if (utf8) {
407 w_char tmpword[MAXWORDLEN];
408 *apostrophe = '\0';
409 wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
410 *apostrophe = '\'';
411 if (wl2 >= 0 && wl2 < nc) {
412 mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
413 rv = checkword(cw, info, root);
414 if (rv) break;
415 }
416 } else {
417 mkinitcap2(apostrophe + 1, unicw, nc);
418 rv = checkword(cw, info, root);
419 if (rv) break;
420 }
421 }
422 mkinitcap2(cw, unicw, nc);
423 rv = checkword(cw, info, root);
424 if (rv) break;
425 }
426 if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
427 char tmpword[MAXWORDUTF8LEN];
428 wl = mkallsmall2(cw, unicw, nc);
429 memcpy(wspace,cw,(wl+1));
430 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
431 if (!rv) {
432 wl2 = mkinitcap2(cw, unicw, nc);
433 rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
434 }
435 if ((abbv) && !(rv)) {
436 *(wspace+wl) = '.';
437 *(wspace+wl+1) = '\0';
438 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
439 if (!rv) {
440 memcpy(wspace, cw, wl2);
441 *(wspace+wl2) = '.';
442 *(wspace+wl2+1) = '\0';
443 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
444 }
445 }
446 if (rv) break;
447 }
448 }
449 case INITCAP: {
450 *info += SPELL_ORIGCAP;
451 wl = mkallsmall2(cw, unicw, nc);
452 memcpy(wspace,cw,(wl+1));
453 wl2 = mkinitcap2(cw, unicw, nc);
454 if (captype == INITCAP) *info += SPELL_INITCAP;
455 rv = checkword(cw, info, root);
456 if (captype == INITCAP) *info -= SPELL_INITCAP;
457 // forbid bad capitalization
458 // (for example, ijs -> Ijs instead of IJs in Dutch)
459 // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
460 if (*info & SPELL_FORBIDDEN) {
461 rv = NULL;
462 break;
463 }
464 if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
465 if (rv) break;
466
467 rv = checkword(wspace, info, root);
468 if (abbv && !rv) {
469
470 *(wspace+wl) = '.';
471 *(wspace+wl+1) = '\0';
472 rv = checkword(wspace, info, root);
473 if (!rv) {
474 memcpy(wspace, cw, wl2);
475 *(wspace+wl2) = '.';
476 *(wspace+wl2+1) = '\0';
477 if (captype == INITCAP) *info += SPELL_INITCAP;
478 rv = checkword(wspace, info, root);
479 if (captype == INITCAP) *info -= SPELL_INITCAP;
480 if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
481 break;
482 }
483 }
484 if (rv && is_keepcase(rv) &&
485 ((captype == ALLCAP) ||
486 // if CHECKSHARPS: KEEPCASE words with \xDF are allowed
487 // in INITCAP form, too.
488 !(pAMgr->get_checksharps() &&
489 ((utf8 && strstr(wspace, "\xC3\x9F")) ||
490 (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
491 break;
492 }
493 }
494
495 if (rv) {
496 if (pAMgr && pAMgr->get_warn() && rv->astr &&
497 TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
498 *info += SPELL_WARN;
499 if (pAMgr->get_forbidwarn()) return 0;
500 return HUNSPELL_OK_WARN;
501 }
502 return HUNSPELL_OK;
503 }
504
505 // recursive breaking at break points
506 if (wordbreak) {
507 char * s;
508 char r;
509 int nbr = 0;
510 wl = strlen(cw);
511 int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
512
513 // calculate break points for recursion limit
514 for (int j = 0; j < numbreak; j++) {
515 s = cw;
516 do {
517 s = (char *) strstr(s, wordbreak[j]);
518 if (s) {
519 nbr++;
520 s++;
521 }
522 } while (s);
523 }
524 if (nbr >= 10) return 0;
525
526 // check boundary patterns (^begin and end$)
527 for (int j = 0; j < numbreak; j++) {
528 int plen = strlen(wordbreak[j]);
529 if (plen == 1 || plen > wl) continue;
530 if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
531 && spell(cw + plen - 1)) return 1;
532 if (wordbreak[j][plen - 1] == '$' &&
533 strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
534 r = cw[wl - plen + 1];
535 cw[wl - plen + 1] = '\0';
536 if (spell(cw)) return 1;
537 cw[wl - plen + 1] = r;
538 }
539 }
540
541 // other patterns
542 for (int j = 0; j < numbreak; j++) {
543 int plen = strlen(wordbreak[j]);
544 s=(char *) strstr(cw, wordbreak[j]);
545 if (s && (s > cw) && (s < cw + wl - plen)) {
546 if (!spell(s + plen)) continue;
547 r = *s;
548 *s = '\0';
549 // examine 2 sides of the break point
550 if (spell(cw)) return 1;
551 *s = r;
552
553 // LANG_hu: spec. dash rule
554 if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
555 r = s[1];
556 s[1] = '\0';
557 if (spell(cw)) return 1; // check the first part with dash
558 s[1] = r;
559 }
560 // end of LANG speficic region
561
562 }
563 }
564 }
565
566 return 0;
567 }
568
checkword(const char * w,int * info,char ** root)569 struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
570 {
571 struct hentry * he = NULL;
572 int len, i;
573 char w2[MAXWORDUTF8LEN];
574 const char * word;
575
576 char * ignoredchars = pAMgr->get_ignore();
577 if (ignoredchars != NULL) {
578 strcpy(w2, w);
579 if (utf8) {
580 int ignoredchars_utf16_len;
581 unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
582 remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
583 } else {
584 remove_ignored_chars(w2,ignoredchars);
585 }
586 word = w2;
587 } else word = w;
588
589 len = strlen(word);
590
591 if (!len)
592 return NULL;
593
594 // word reversing wrapper for complex prefixes
595 if (complexprefixes) {
596 if (word != w2) {
597 strcpy(w2, word);
598 word = w2;
599 }
600 if (utf8) reverseword_utf(w2); else reverseword(w2);
601 }
602
603 // look word in hash table
604 for (i = 0; (i < maxdic) && !he; i ++) {
605 he = (pHMgr[i])->lookup(word);
606
607 // check forbidden and onlyincompound words
608 if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
609 if (info) *info += SPELL_FORBIDDEN;
610 // LANG_hu section: set dash information for suggestions
611 if (langnum == LANG_hu) {
612 if (pAMgr->get_compoundflag() &&
613 TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
614 if (info) *info += SPELL_COMPOUND;
615 }
616 }
617 return NULL;
618 }
619
620 // he = next not needaffix, onlyincompound homonym or onlyupcase word
621 while (he && (he->astr) &&
622 ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
623 (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
624 (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
625 )) he = he->next_homonym;
626 }
627
628 // check with affixes
629 if (!he && pAMgr) {
630 // try stripping off affixes */
631 he = pAMgr->affix_check(word, len, 0);
632
633 // check compound restriction and onlyupcase
634 if (he && he->astr && (
635 (pAMgr->get_onlyincompound() &&
636 TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
637 (info && (*info & SPELL_INITCAP) &&
638 TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
639 he = NULL;
640 }
641
642 if (he) {
643 if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
644 if (info) *info += SPELL_FORBIDDEN;
645 return NULL;
646 }
647 if (root) {
648 *root = mystrdup(he->word);
649 if (*root && complexprefixes) {
650 if (utf8) reverseword_utf(*root); else reverseword(*root);
651 }
652 }
653 // try check compound word
654 } else if (pAMgr->get_compound()) {
655 he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);
656 // LANG_hu section: `moving rule' with last dash
657 if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
658 char * dup = mystrdup(word);
659 if (!dup) return NULL;
660 dup[len-1] = '\0';
661 he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info);
662 free(dup);
663 }
664 // end of LANG speficic region
665 if (he) {
666 if (root) {
667 *root = mystrdup(he->word);
668 if (*root && complexprefixes) {
669 if (utf8) reverseword_utf(*root); else reverseword(*root);
670 }
671 }
672 if (info) *info += SPELL_COMPOUND;
673 }
674 }
675
676 }
677
678 return he;
679 }
680
suggest(char *** slst,const char * word)681 int Hunspell::suggest(char*** slst, const char * word)
682 {
683 int onlycmpdsug = 0;
684 char cw[MAXWORDUTF8LEN];
685 char wspace[MAXWORDUTF8LEN];
686 if (!pSMgr || maxdic == 0) return 0;
687 w_char unicw[MAXWORDLEN];
688 *slst = NULL;
689 // process XML input of the simplified API (see manual)
690 if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
691 return spellml(slst, word);
692 }
693 int nc = strlen(word);
694 if (utf8) {
695 if (nc >= MAXWORDUTF8LEN) return 0;
696 } else {
697 if (nc >= MAXWORDLEN) return 0;
698 }
699 int captype = 0;
700 int abbv = 0;
701 int wl = 0;
702
703 // input conversion
704 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
705 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
706 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
707
708 if (wl == 0) return 0;
709 int ns = 0;
710 int capwords = 0;
711
712 // check capitalized form for FORCEUCASE
713 if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
714 int info = SPELL_ORIGCAP;
715 char ** wlst;
716 if (checkword(cw, &info, NULL)) {
717 if (*slst) {
718 wlst = *slst;
719 } else {
720 wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));
721 if (wlst == NULL) return -1;
722 *slst = wlst;
723 for (int i = 0; i < MAXSUGGESTION; i++) {
724 wlst[i] = NULL;
725 }
726 }
727 wlst[0] = mystrdup(cw);
728 mkinitcap(wlst[0]);
729 return 1;
730 }
731 }
732
733 switch(captype) {
734 case NOCAP: {
735 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
736 break;
737 }
738
739 case INITCAP: {
740 capwords = 1;
741 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
742 if (ns == -1) break;
743 memcpy(wspace,cw,(wl+1));
744 mkallsmall2(wspace, unicw, nc);
745 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
746 break;
747 }
748 case HUHINITCAP:
749 capwords = 1;
750 case HUHCAP: {
751 ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
752 if (ns != -1) {
753 int prevns;
754 // something.The -> something. The
755 char * dot = strchr(cw, '.');
756 if (dot && (dot > cw)) {
757 int captype_;
758 if (utf8)
759 {
760 w_char w_[MAXWORDLEN];
761 int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
762 captype_ = get_captype_utf8(w_, wl_, langnum);
763 } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
764 if (captype_ == INITCAP)
765 {
766 char * st = mystrdup(cw);
767 if (st)
768 {
769 char *newst = (char *) realloc(st, wl + 2);
770 if (newst == NULL)
771 free(st);
772 st = newst;
773 }
774 if (st)
775 {
776 st[(dot - cw) + 1] = ' ';
777 strcpy(st + (dot - cw) + 2, dot + 1);
778 ns = insert_sug(slst, st, ns);
779 free(st);
780 }
781 }
782 }
783 if (captype == HUHINITCAP) {
784 // TheOpenOffice.org -> The OpenOffice.org
785 memcpy(wspace,cw,(wl+1));
786 mkinitsmall2(wspace, unicw, nc);
787 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
788 }
789 memcpy(wspace,cw,(wl+1));
790 mkallsmall2(wspace, unicw, nc);
791 if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
792 prevns = ns;
793 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
794 if (captype == HUHINITCAP) {
795 mkinitcap2(wspace, unicw, nc);
796 if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
797 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
798 }
799 // aNew -> "a New" (instead of "a new")
800 for (int j = prevns; j < ns; j++) {
801 char * space = strchr((*slst)[j],' ');
802 if (space) {
803 int slen = strlen(space + 1);
804 // different case after space (need capitalisation)
805 if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
806 w_char w[MAXWORDLEN];
807 int wc = 0;
808 char * r = (*slst)[j];
809 if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
810 mkinitcap2(space + 1, w, wc);
811 // set as first suggestion
812 for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
813 (*slst)[0] = r;
814 }
815 }
816 }
817 }
818 break;
819 }
820
821 case ALLCAP: {
822 memcpy(wspace, cw, (wl+1));
823 mkallsmall2(wspace, unicw, nc);
824 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
825 if (ns == -1) break;
826 if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
827 ns = insert_sug(slst, wspace, ns);
828 mkinitcap2(wspace, unicw, nc);
829 ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
830 for (int j=0; j < ns; j++) {
831 mkallcap((*slst)[j]);
832 if (pAMgr && pAMgr->get_checksharps()) {
833 char * pos;
834 if (utf8) {
835 pos = strstr((*slst)[j], "\xC3\x9F");
836 while (pos) {
837 *pos = 'S';
838 *(pos+1) = 'S';
839 pos = strstr(pos+2, "\xC3\x9F");
840 }
841 } else {
842 pos = strchr((*slst)[j], '\xDF');
843 while (pos) {
844 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
845 mystrrep((*slst)[j], "\xDF", "SS");
846 pos = strchr((*slst)[j], '\xDF');
847 }
848 }
849 }
850 }
851 break;
852 }
853 }
854
855 // LANG_hu section: replace '-' with ' ' in Hungarian
856 if (langnum == LANG_hu) {
857 for (int j=0; j < ns; j++) {
858 char * pos = strchr((*slst)[j],'-');
859 if (pos) {
860 int info;
861 char w[MAXWORDUTF8LEN];
862 *pos = '\0';
863 strcpy(w, (*slst)[j]);
864 strcat(w, pos + 1);
865 (void)spell(w, &info, NULL);
866 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
867 *pos = ' ';
868 } else *pos = '-';
869 }
870 }
871 }
872 // END OF LANG_hu section
873
874 // try ngram approach since found nothing or only compound words
875 if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
876 switch(captype) {
877 case NOCAP: {
878 ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
879 break;
880 }
881 case HUHINITCAP:
882 capwords = 1;
883 case HUHCAP: {
884 memcpy(wspace,cw,(wl+1));
885 mkallsmall2(wspace, unicw, nc);
886 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
887 break;
888 }
889 case INITCAP: {
890 capwords = 1;
891 memcpy(wspace,cw,(wl+1));
892 mkallsmall2(wspace, unicw, nc);
893 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
894 break;
895 }
896 case ALLCAP: {
897 memcpy(wspace,cw,(wl+1));
898 mkallsmall2(wspace, unicw, nc);
899 int oldns = ns;
900 ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
901 for (int j = oldns; j < ns; j++)
902 mkallcap((*slst)[j]);
903 break;
904 }
905 }
906 }
907
908 // try dash suggestion (Afo-American -> Afro-American)
909 if (char * pos = strchr(cw, '-')) {
910 char * ppos = cw;
911 int nodashsug = 1;
912 char ** nlst = NULL;
913 int nn = 0;
914 int last = 0;
915 if (*slst) {
916 for (int j = 0; j < ns && nodashsug == 1; j++) {
917 if (strchr((*slst)[j], '-')) nodashsug = 0;
918 }
919 }
920 while (nodashsug && !last) {
921 if (*pos == '\0') last = 1; else *pos = '\0';
922 if (!spell(ppos)) {
923 nn = suggest(&nlst, ppos);
924 for (int j = nn - 1; j >= 0; j--) {
925 strncpy(wspace, cw, ppos - cw);
926 strcpy(wspace + (ppos - cw), nlst[j]);
927 if (!last) {
928 strcat(wspace, "-");
929 strcat(wspace, pos + 1);
930 }
931 ns = insert_sug(slst, wspace, ns);
932 free(nlst[j]);
933 }
934 if (nlst != NULL) free(nlst);
935 nodashsug = 0;
936 }
937 if (!last) {
938 *pos = '-';
939 ppos = pos + 1;
940 pos = strchr(ppos, '-');
941 }
942 if (!pos) pos = cw + strlen(cw);
943 }
944 }
945
946 // word reversing wrapper for complex prefixes
947 if (complexprefixes) {
948 for (int j = 0; j < ns; j++) {
949 if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
950 }
951 }
952
953 // capitalize
954 if (capwords) for (int j=0; j < ns; j++) {
955 mkinitcap((*slst)[j]);
956 }
957
958 // expand suggestions with dot(s)
959 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
960 for (int j = 0; j < ns; j++) {
961 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
962 strcat((*slst)[j], word + strlen(word) - abbv);
963 }
964 }
965
966 // remove bad capitalized and forbidden forms
967 if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
968 switch (captype) {
969 case INITCAP:
970 case ALLCAP: {
971 int l = 0;
972 for (int j=0; j < ns; j++) {
973 if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {
974 char s[MAXSWUTF8L];
975 w_char w[MAXSWL];
976 int len;
977 if (utf8) {
978 len = u8_u16(w, MAXSWL, (*slst)[j]);
979 } else {
980 strcpy(s, (*slst)[j]);
981 len = strlen(s);
982 }
983 mkallsmall2(s, w, len);
984 free((*slst)[j]);
985 if (spell(s)) {
986 (*slst)[l] = mystrdup(s);
987 if ((*slst)[l]) l++;
988 } else {
989 mkinitcap2(s, w, len);
990 if (spell(s)) {
991 (*slst)[l] = mystrdup(s);
992 if ((*slst)[l]) l++;
993 }
994 }
995 } else {
996 (*slst)[l] = (*slst)[j];
997 l++;
998 }
999 }
1000 ns = l;
1001 }
1002 }
1003 }
1004
1005 // remove duplications
1006 int l = 0;
1007 for (int j = 0; j < ns; j++) {
1008 (*slst)[l] = (*slst)[j];
1009 for (int k = 0; k < l; k++) {
1010 if (strcmp((*slst)[k], (*slst)[j]) == 0) {
1011 free((*slst)[j]);
1012 l--;
1013 break;
1014 }
1015 }
1016 l++;
1017 }
1018 ns = l;
1019
1020 // output conversion
1021 rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1022 for (int j = 0; rl && j < ns; j++) {
1023 if (rl->conv((*slst)[j], wspace)) {
1024 free((*slst)[j]);
1025 (*slst)[j] = mystrdup(wspace);
1026 }
1027 }
1028
1029 // if suggestions removed by nosuggest, onlyincompound parameters
1030 if (l == 0 && *slst) {
1031 free(*slst);
1032 *slst = NULL;
1033 }
1034 return l;
1035 }
1036
free_list(char *** slst,int n)1037 void Hunspell::free_list(char *** slst, int n) {
1038 freelist(slst, n);
1039 }
1040
get_dic_encoding()1041 char * Hunspell::get_dic_encoding()
1042 {
1043 return encoding;
1044 }
1045
1046 #ifdef HUNSPELL_EXPERIMENTAL
1047 // XXX need UTF-8 support
suggest_auto(char *** slst,const char * word)1048 int Hunspell::suggest_auto(char*** slst, const char * word)
1049 {
1050 char cw[MAXWORDUTF8LEN];
1051 char wspace[MAXWORDUTF8LEN];
1052 if (!pSMgr || maxdic == 0) return 0;
1053 int wl = strlen(word);
1054 if (utf8) {
1055 if (wl >= MAXWORDUTF8LEN) return 0;
1056 } else {
1057 if (wl >= MAXWORDLEN) return 0;
1058 }
1059 int captype = 0;
1060 int abbv = 0;
1061 wl = cleanword(cw, word, &captype, &abbv);
1062 if (wl == 0) return 0;
1063 int ns = 0;
1064 *slst = NULL; // HU, nsug in pSMgr->suggest
1065
1066 switch(captype) {
1067 case NOCAP: {
1068 ns = pSMgr->suggest_auto(slst, cw, ns);
1069 if (ns>0) break;
1070 break;
1071 }
1072
1073 case INITCAP: {
1074 memcpy(wspace,cw,(wl+1));
1075 mkallsmall(wspace);
1076 ns = pSMgr->suggest_auto(slst, wspace, ns);
1077 for (int j=0; j < ns; j++)
1078 mkinitcap((*slst)[j]);
1079 ns = pSMgr->suggest_auto(slst, cw, ns);
1080 break;
1081
1082 }
1083
1084 case HUHINITCAP:
1085 case HUHCAP: {
1086 ns = pSMgr->suggest_auto(slst, cw, ns);
1087 if (ns == 0) {
1088 memcpy(wspace,cw,(wl+1));
1089 mkallsmall(wspace);
1090 ns = pSMgr->suggest_auto(slst, wspace, ns);
1091 }
1092 break;
1093 }
1094
1095 case ALLCAP: {
1096 memcpy(wspace,cw,(wl+1));
1097 mkallsmall(wspace);
1098 ns = pSMgr->suggest_auto(slst, wspace, ns);
1099
1100 mkinitcap(wspace);
1101 ns = pSMgr->suggest_auto(slst, wspace, ns);
1102
1103 for (int j=0; j < ns; j++)
1104 mkallcap((*slst)[j]);
1105 break;
1106 }
1107 }
1108
1109 // word reversing wrapper for complex prefixes
1110 if (complexprefixes) {
1111 for (int j = 0; j < ns; j++) {
1112 if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
1113 }
1114 }
1115
1116 // expand suggestions with dot(s)
1117 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
1118 for (int j = 0; j < ns; j++) {
1119 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
1120 strcat((*slst)[j], word + strlen(word) - abbv);
1121 }
1122 }
1123
1124 // LANG_hu section: replace '-' with ' ' in Hungarian
1125 if (langnum == LANG_hu) {
1126 for (int j=0; j < ns; j++) {
1127 char * pos = strchr((*slst)[j],'-');
1128 if (pos) {
1129 int info;
1130 char w[MAXWORDUTF8LEN];
1131 *pos = '\0';
1132 strcpy(w, (*slst)[j]);
1133 strcat(w, pos + 1);
1134 spell(w, &info, NULL);
1135 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
1136 *pos = ' ';
1137 } else *pos = '-';
1138 }
1139 }
1140 }
1141 // END OF LANG_hu section
1142 return ns;
1143 }
1144 #endif
1145
stem(char *** slst,char ** desc,int n)1146 int Hunspell::stem(char*** slst, char ** desc, int n)
1147 {
1148 char result[MAXLNLEN];
1149 char result2[MAXLNLEN];
1150 *slst = NULL;
1151 if (n == 0) return 0;
1152 *result2 = '\0';
1153 for (int i = 0; i < n; i++) {
1154 *result = '\0';
1155 // add compound word parts (except the last one)
1156 char * s = (char *) desc[i];
1157 char * part = strstr(s, MORPH_PART);
1158 if (part) {
1159 char * nextpart = strstr(part + 1, MORPH_PART);
1160 while (nextpart) {
1161 copy_field(result + strlen(result), part, MORPH_PART);
1162 part = nextpart;
1163 nextpart = strstr(part + 1, MORPH_PART);
1164 }
1165 s = part;
1166 }
1167
1168 char **pl;
1169 char tok[MAXLNLEN];
1170 strcpy(tok, s);
1171 char * alt = strstr(tok, " | ");
1172 while (alt) {
1173 alt[1] = MSEP_ALT;
1174 alt = strstr(alt, " | ");
1175 }
1176 int pln = line_tok(tok, &pl, MSEP_ALT);
1177 for (int k = 0; k < pln; k++) {
1178 // add derivational suffixes
1179 if (strstr(pl[k], MORPH_DERI_SFX)) {
1180 // remove inflectional suffixes
1181 char * is = strstr(pl[k], MORPH_INFL_SFX);
1182 if (is) *is = '\0';
1183 char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
1184 if (sg) {
1185 char ** gen;
1186 int genl = line_tok(sg, &gen, MSEP_REC);
1187 free(sg);
1188 for (int j = 0; j < genl; j++) {
1189 sprintf(result2 + strlen(result2), "%c%s%s",
1190 MSEP_REC, result, gen[j]);
1191 }
1192 freelist(&gen, genl);
1193 }
1194 } else {
1195 sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
1196 if (strstr(pl[k], MORPH_SURF_PFX)) {
1197 copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
1198 }
1199 copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
1200 }
1201 }
1202 freelist(&pl, pln);
1203 }
1204 int sln = line_tok(result2, slst, MSEP_REC);
1205 return uniqlist(*slst, sln);
1206
1207 }
1208
stem(char *** slst,const char * word)1209 int Hunspell::stem(char*** slst, const char * word)
1210 {
1211 char ** pl;
1212 int pln = analyze(&pl, word);
1213 int pln2 = stem(slst, pl, pln);
1214 freelist(&pl, pln);
1215 return pln2;
1216 }
1217
1218 #ifdef HUNSPELL_EXPERIMENTAL
suggest_pos_stems(char *** slst,const char * word)1219 int Hunspell::suggest_pos_stems(char*** slst, const char * word)
1220 {
1221 char cw[MAXWORDUTF8LEN];
1222 char wspace[MAXWORDUTF8LEN];
1223 if (! pSMgr || maxdic == 0) return 0;
1224 int wl = strlen(word);
1225 if (utf8) {
1226 if (wl >= MAXWORDUTF8LEN) return 0;
1227 } else {
1228 if (wl >= MAXWORDLEN) return 0;
1229 }
1230 int captype = 0;
1231 int abbv = 0;
1232 wl = cleanword(cw, word, &captype, &abbv);
1233 if (wl == 0) return 0;
1234
1235 int ns = 0; // ns=0 = normalized input
1236
1237 *slst = NULL; // HU, nsug in pSMgr->suggest
1238
1239 switch(captype) {
1240 case HUHCAP:
1241 case NOCAP: {
1242 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1243
1244 if ((abbv) && (ns == 0)) {
1245 memcpy(wspace,cw,wl);
1246 *(wspace+wl) = '.';
1247 *(wspace+wl+1) = '\0';
1248 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1249 }
1250
1251 break;
1252 }
1253
1254 case INITCAP: {
1255
1256 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1257
1258 if (ns == 0 || ((*slst)[0][0] == '#')) {
1259 memcpy(wspace,cw,(wl+1));
1260 mkallsmall(wspace);
1261 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1262 }
1263
1264 break;
1265
1266 }
1267
1268 case ALLCAP: {
1269 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1270 if (ns != 0) break;
1271
1272 memcpy(wspace,cw,(wl+1));
1273 mkallsmall(wspace);
1274 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1275
1276 if (ns == 0) {
1277 mkinitcap(wspace);
1278 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1279 }
1280 break;
1281 }
1282 }
1283
1284 return ns;
1285 }
1286 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1287
get_wordchars()1288 const char * Hunspell::get_wordchars()
1289 {
1290 return pAMgr->get_wordchars();
1291 }
1292
get_wordchars_utf16(int * len)1293 unsigned short * Hunspell::get_wordchars_utf16(int * len)
1294 {
1295 return pAMgr->get_wordchars_utf16(len);
1296 }
1297
mkinitcap(char * p)1298 void Hunspell::mkinitcap(char * p)
1299 {
1300 if (!utf8) {
1301 if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1302 } else {
1303 int len;
1304 w_char u[MAXWORDLEN];
1305 len = u8_u16(u, MAXWORDLEN, p);
1306 unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1307 u[0].h = (unsigned char) (i >> 8);
1308 u[0].l = (unsigned char) (i & 0x00FF);
1309 u16_u8(p, MAXWORDUTF8LEN, u, len);
1310 }
1311 }
1312
mkinitcap2(char * p,w_char * u,int nc)1313 int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
1314 {
1315 if (!utf8) {
1316 if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1317 } else if (nc > 0) {
1318 unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1319 u[0].h = (unsigned char) (i >> 8);
1320 u[0].l = (unsigned char) (i & 0x00FF);
1321 u16_u8(p, MAXWORDUTF8LEN, u, nc);
1322 return strlen(p);
1323 }
1324 return nc;
1325 }
1326
mkinitsmall2(char * p,w_char * u,int nc)1327 int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
1328 {
1329 if (!utf8) {
1330 if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
1331 } else if (nc > 0) {
1332 unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
1333 u[0].h = (unsigned char) (i >> 8);
1334 u[0].l = (unsigned char) (i & 0x00FF);
1335 u16_u8(p, MAXWORDUTF8LEN, u, nc);
1336 return strlen(p);
1337 }
1338 return nc;
1339 }
1340
add(const char * word)1341 int Hunspell::add(const char * word)
1342 {
1343 if (pHMgr[0]) return (pHMgr[0])->add(word);
1344 return 0;
1345 }
1346
add_with_affix(const char * word,const char * example)1347 int Hunspell::add_with_affix(const char * word, const char * example)
1348 {
1349 if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
1350 return 0;
1351 }
1352
remove(const char * word)1353 int Hunspell::remove(const char * word)
1354 {
1355 if (pHMgr[0]) return (pHMgr[0])->remove(word);
1356 return 0;
1357 }
1358
get_version()1359 const char * Hunspell::get_version()
1360 {
1361 return pAMgr->get_version();
1362 }
1363
get_csconv()1364 struct cs_info * Hunspell::get_csconv()
1365 {
1366 return csconv;
1367 }
1368
cat_result(char * result,char * st)1369 void Hunspell::cat_result(char * result, char * st)
1370 {
1371 if (st) {
1372 if (*result) mystrcat(result, "\n", MAXLNLEN);
1373 mystrcat(result, st, MAXLNLEN);
1374 free(st);
1375 }
1376 }
1377
analyze(char *** slst,const char * word)1378 int Hunspell::analyze(char*** slst, const char * word)
1379 {
1380 char cw[MAXWORDUTF8LEN];
1381 char wspace[MAXWORDUTF8LEN];
1382 w_char unicw[MAXWORDLEN];
1383 int wl2 = 0;
1384 *slst = NULL;
1385 if (! pSMgr || maxdic == 0) return 0;
1386 int nc = strlen(word);
1387 if (utf8) {
1388 if (nc >= MAXWORDUTF8LEN) return 0;
1389 } else {
1390 if (nc >= MAXWORDLEN) return 0;
1391 }
1392 int captype = 0;
1393 int abbv = 0;
1394 int wl = 0;
1395
1396 // input conversion
1397 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1398 if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
1399 else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
1400
1401 if (wl == 0) {
1402 if (abbv) {
1403 for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
1404 cw[wl] = '\0';
1405 abbv = 0;
1406 } else return 0;
1407 }
1408
1409 char result[MAXLNLEN];
1410 char * st = NULL;
1411
1412 *result = '\0';
1413
1414 int n = 0;
1415 int n2 = 0;
1416 int n3 = 0;
1417
1418 // test numbers
1419 // LANG_hu section: set dash information for suggestions
1420 if (langnum == LANG_hu) {
1421 while ((n < wl) &&
1422 (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
1423 n++;
1424 if ((cw[n] == '.') || (cw[n] == ',')) {
1425 if (((n2 == 0) && (n > 3)) ||
1426 ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
1427 n2++;
1428 n3 = n;
1429 }
1430 }
1431
1432 if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
1433 if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {
1434 mystrcat(result, cw, MAXLNLEN);
1435 result[n - 1] = '\0';
1436 if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
1437 else {
1438 char sign = cw[n];
1439 cw[n] = '\0';
1440 cat_result(result, pSMgr->suggest_morph(cw + n - 1));
1441 mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE
1442 cw[n] = sign;
1443 cat_result(result, pSMgr->suggest_morph(cw + n));
1444 }
1445 return line_tok(result, slst, MSEP_REC);
1446 }
1447 }
1448 // END OF LANG_hu section
1449
1450 switch(captype) {
1451 case HUHCAP:
1452 case HUHINITCAP:
1453 case NOCAP: {
1454 cat_result(result, pSMgr->suggest_morph(cw));
1455 if (abbv) {
1456 memcpy(wspace,cw,wl);
1457 *(wspace+wl) = '.';
1458 *(wspace+wl+1) = '\0';
1459 cat_result(result, pSMgr->suggest_morph(wspace));
1460 }
1461 break;
1462 }
1463 case INITCAP: {
1464 wl = mkallsmall2(cw, unicw, nc);
1465 memcpy(wspace,cw,(wl+1));
1466 wl2 = mkinitcap2(cw, unicw, nc);
1467 cat_result(result, pSMgr->suggest_morph(wspace));
1468 cat_result(result, pSMgr->suggest_morph(cw));
1469 if (abbv) {
1470 *(wspace+wl) = '.';
1471 *(wspace+wl+1) = '\0';
1472 cat_result(result, pSMgr->suggest_morph(wspace));
1473
1474 memcpy(wspace, cw, wl2);
1475 *(wspace+wl2) = '.';
1476 *(wspace+wl2+1) = '\0';
1477
1478 cat_result(result, pSMgr->suggest_morph(wspace));
1479 }
1480 break;
1481 }
1482 case ALLCAP: {
1483 cat_result(result, pSMgr->suggest_morph(cw));
1484 if (abbv) {
1485 memcpy(wspace,cw,wl);
1486 *(wspace+wl) = '.';
1487 *(wspace+wl+1) = '\0';
1488 cat_result(result, pSMgr->suggest_morph(cw));
1489 }
1490 wl = mkallsmall2(cw, unicw, nc);
1491 memcpy(wspace,cw,(wl+1));
1492 wl2 = mkinitcap2(cw, unicw, nc);
1493
1494 cat_result(result, pSMgr->suggest_morph(wspace));
1495 cat_result(result, pSMgr->suggest_morph(cw));
1496 if (abbv) {
1497 *(wspace+wl) = '.';
1498 *(wspace+wl+1) = '\0';
1499 cat_result(result, pSMgr->suggest_morph(wspace));
1500
1501 memcpy(wspace, cw, wl2);
1502 *(wspace+wl2) = '.';
1503 *(wspace+wl2+1) = '\0';
1504
1505 cat_result(result, pSMgr->suggest_morph(wspace));
1506 }
1507 break;
1508 }
1509 }
1510
1511 if (*result) {
1512 // word reversing wrapper for complex prefixes
1513 if (complexprefixes) {
1514 if (utf8) reverseword_utf(result); else reverseword(result);
1515 }
1516 return line_tok(result, slst, MSEP_REC);
1517 }
1518
1519 // compound word with dash (HU) I18n
1520 char * dash = NULL;
1521 int nresult = 0;
1522 // LANG_hu section: set dash information for suggestions
1523 if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
1524 if ((langnum == LANG_hu) && dash) {
1525 *dash='\0';
1526 // examine 2 sides of the dash
1527 if (dash[1] == '\0') { // base word ending with dash
1528 if (spell(cw)) {
1529 char * p = pSMgr->suggest_morph(cw);
1530 if (p) {
1531 int ret = line_tok(p, slst, MSEP_REC);
1532 free(p);
1533 return ret;
1534 }
1535
1536 }
1537 } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
1538 if (spell(cw) && (spell("-e"))) {
1539 st = pSMgr->suggest_morph(cw);
1540 if (st) {
1541 mystrcat(result, st, MAXLNLEN);
1542 free(st);
1543 }
1544 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
1545 st = pSMgr->suggest_morph("-e");
1546 if (st) {
1547 mystrcat(result, st, MAXLNLEN);
1548 free(st);
1549 }
1550 return line_tok(result, slst, MSEP_REC);
1551 }
1552 } else {
1553 // first word ending with dash: word- XXX ???
1554 char r2 = *(dash + 1);
1555 dash[0]='-';
1556 dash[1]='\0';
1557 nresult = spell(cw);
1558 dash[1] = r2;
1559 dash[0]='\0';
1560 if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
1561 ((dash[1] > '0') && (dash[1] < '9')))) {
1562 st = pSMgr->suggest_morph(cw);
1563 if (st) {
1564 mystrcat(result, st, MAXLNLEN);
1565 free(st);
1566 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
1567 }
1568 st = pSMgr->suggest_morph(dash+1);
1569 if (st) {
1570 mystrcat(result, st, MAXLNLEN);
1571 free(st);
1572 }
1573 return line_tok(result, slst, MSEP_REC);
1574 }
1575 }
1576 // affixed number in correct word
1577 if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
1578 (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
1579 *dash='-';
1580 n = 1;
1581 if (*(dash - n) == '.') n++;
1582 // search first not a number character to left from dash
1583 while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
1584 n++;
1585 }
1586 if ((dash - n) < cw) n--;
1587 // numbers: valami1000000-hoz
1588 // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1589 // 56-hoz, 6-hoz
1590 for(; n >= 1; n--) {
1591 if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
1592 mystrcat(result, cw, MAXLNLEN);
1593 result[dash - cw - n] = '\0';
1594 st = pSMgr->suggest_morph(dash - n);
1595 if (st) {
1596 mystrcat(result, st, MAXLNLEN);
1597 free(st);
1598 }
1599 return line_tok(result, slst, MSEP_REC);
1600 }
1601 }
1602 }
1603 }
1604 return 0;
1605 }
1606
generate(char *** slst,const char * word,char ** pl,int pln)1607 int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
1608 {
1609 *slst = NULL;
1610 if (!pSMgr || !pln) return 0;
1611 char **pl2;
1612 int pl2n = analyze(&pl2, word);
1613 int captype = 0;
1614 int abbv = 0;
1615 char cw[MAXWORDUTF8LEN];
1616 cleanword(cw, word, &captype, &abbv);
1617 char result[MAXLNLEN];
1618 *result = '\0';
1619
1620 for (int i = 0; i < pln; i++) {
1621 cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
1622 }
1623 freelist(&pl2, pl2n);
1624
1625 if (*result) {
1626 // allcap
1627 if (captype == ALLCAP) mkallcap(result);
1628
1629 // line split
1630 int linenum = line_tok(result, slst, MSEP_REC);
1631
1632 // capitalize
1633 if (captype == INITCAP || captype == HUHINITCAP) {
1634 for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
1635 }
1636
1637 // temporary filtering of prefix related errors (eg.
1638 // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1639
1640 int r = 0;
1641 for (int j=0; j < linenum; j++) {
1642 if (!spell((*slst)[j])) {
1643 free((*slst)[j]);
1644 (*slst)[j] = NULL;
1645 } else {
1646 if (r < j) (*slst)[r] = (*slst)[j];
1647 r++;
1648 }
1649 }
1650 if (r > 0) return r;
1651 free(*slst);
1652 *slst = NULL;
1653 }
1654 return 0;
1655 }
1656
generate(char *** slst,const char * word,const char * pattern)1657 int Hunspell::generate(char*** slst, const char * word, const char * pattern)
1658 {
1659 char **pl;
1660 int pln = analyze(&pl, pattern);
1661 int n = generate(slst, word, pl, pln);
1662 freelist(&pl, pln);
1663 return uniqlist(*slst, n);
1664 }
1665
1666 // minimal XML parser functions
get_xml_par(char * dest,const char * par,int max)1667 int Hunspell::get_xml_par(char * dest, const char * par, int max)
1668 {
1669 char * d = dest;
1670 if (!par) return 0;
1671 char end = *par;
1672 char * dmax = dest + max;
1673 if (end == '>') end = '<';
1674 else if (end != '\'' && end != '"') return 0; // bad XML
1675 for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
1676 *d = '\0';
1677 mystrrep(dest, "<", "<");
1678 mystrrep(dest, "&", "&");
1679 return (int)(d - dest);
1680 }
1681
get_langnum() const1682 int Hunspell::get_langnum() const
1683 {
1684 return langnum;
1685 }
1686
input_conv(const char * word,char * dest)1687 int Hunspell::input_conv(const char * word, char * dest)
1688 {
1689 RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1690 return (rl && rl->conv(word, dest));
1691 }
1692
1693
1694 // return the beginning of the element (attr == NULL) or the attribute
get_xml_pos(const char * s,const char * attr)1695 const char * Hunspell::get_xml_pos(const char * s, const char * attr)
1696 {
1697 const char * end = strchr(s, '>');
1698 const char * p = s;
1699 if (attr == NULL) return end;
1700 do {
1701 p = strstr(p, attr);
1702 if (!p || p >= end) return 0;
1703 } while (*(p-1) != ' ' && *(p-1) != '\n');
1704 return p + strlen(attr);
1705 }
1706
check_xml_par(const char * q,const char * attr,const char * value)1707 int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {
1708 char cw[MAXWORDUTF8LEN];
1709 if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
1710 strcmp(cw, value) == 0) return 1;
1711 return 0;
1712 }
1713
get_xml_list(char *** slst,char * list,const char * tag)1714 int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
1715 int n = 0;
1716 char * p;
1717 if (!list) return 0;
1718 for (p = list; ((p = strstr(p, tag)) != NULL); p++) n++;
1719 if (n == 0) return 0;
1720 *slst = (char **) malloc(sizeof(char *) * n);
1721 if (!*slst) return 0;
1722 for (p = list, n = 0; ((p = strstr(p, tag)) != NULL); p++, n++) {
1723 int l = strlen(p);
1724 (*slst)[n] = (char *) malloc(l + 1);
1725 if (!(*slst)[n]) return n;
1726 if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
1727 free((*slst)[n]);
1728 break;
1729 }
1730 }
1731 return n;
1732 }
1733
1734 namespace
1735 {
myrep(std::string & str,const std::string & search,const std::string & replace)1736 void myrep(std::string& str, const std::string& search, const std::string& replace)
1737 {
1738 size_t pos = 0;
1739 while ((pos = str.find(search, pos)) != std::string::npos)
1740 {
1741 str.replace(pos, search.length(), replace);
1742 pos += replace.length();
1743 }
1744 }
1745 }
1746
spellml(char *** slst,const char * word)1747 int Hunspell::spellml(char*** slst, const char * word)
1748 {
1749 char *q, *q2;
1750 char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
1751 q = (char *) strstr(word, "<query");
1752 if (!q) return 0; // bad XML input
1753 q2 = strchr(q, '>');
1754 if (!q2) return 0; // bad XML input
1755 q2 = strstr(q2, "<word");
1756 if (!q2) return 0; // bad XML input
1757 if (check_xml_par(q, "type=", "analyze")) {
1758 int n = 0;
1759 if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
1760 if (n == 0) return 0;
1761 // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1762 std::string r;
1763 r.append("<code>");
1764 for (int i = 0; i < n; i++) {
1765 r.append("<a>");
1766
1767 std::string entry((*slst)[i]);
1768 free((*slst)[i]);
1769 myrep(entry, "\t", " ");
1770 myrep(entry, "&", "&");
1771 myrep(entry, "<", "<");
1772 r.append(entry);
1773
1774 r.append("</a>");
1775 }
1776 r.append("</code>");
1777 (*slst)[0] = mystrdup(r.c_str());
1778 return 1;
1779 } else if (check_xml_par(q, "type=", "stem")) {
1780 if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
1781 } else if (check_xml_par(q, "type=", "generate")) {
1782 int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
1783 if (n == 0) return 0;
1784 char * q3 = strstr(q2 + 1, "<word");
1785 if (q3) {
1786 if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
1787 return generate(slst, cw, cw2);
1788 }
1789 } else {
1790 if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
1791 char ** slst2;
1792 if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>")) != 0) {
1793 int n2 = generate(slst, cw, slst2, n);
1794 freelist(&slst2, n);
1795 return uniqlist(*slst, n2);
1796 }
1797 freelist(&slst2, n);
1798 }
1799 }
1800 }
1801 return 0;
1802 }
1803
1804
1805 #ifdef HUNSPELL_EXPERIMENTAL
1806 // XXX need UTF-8 support
morph_with_correction(const char * word)1807 char * Hunspell::morph_with_correction(const char * word)
1808 {
1809 char cw[MAXWORDUTF8LEN];
1810 char wspace[MAXWORDUTF8LEN];
1811 if (! pSMgr || maxdic == 0) return NULL;
1812 int wl = strlen(word);
1813 if (utf8) {
1814 if (wl >= MAXWORDUTF8LEN) return NULL;
1815 } else {
1816 if (wl >= MAXWORDLEN) return NULL;
1817 }
1818 int captype = 0;
1819 int abbv = 0;
1820 wl = cleanword(cw, word, &captype, &abbv);
1821 if (wl == 0) return NULL;
1822
1823 char result[MAXLNLEN];
1824 char * st = NULL;
1825
1826 *result = '\0';
1827
1828
1829 switch(captype) {
1830 case NOCAP: {
1831 st = pSMgr->suggest_morph_for_spelling_error(cw);
1832 if (st) {
1833 mystrcat(result, st, MAXLNLEN);
1834 free(st);
1835 }
1836 if (abbv) {
1837 memcpy(wspace,cw,wl);
1838 *(wspace+wl) = '.';
1839 *(wspace+wl+1) = '\0';
1840 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1841 if (st) {
1842 if (*result) mystrcat(result, "\n", MAXLNLEN);
1843 mystrcat(result, st, MAXLNLEN);
1844 free(st);
1845 }
1846 }
1847 break;
1848 }
1849 case INITCAP: {
1850 memcpy(wspace,cw,(wl+1));
1851 mkallsmall(wspace);
1852 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1853 if (st) {
1854 mystrcat(result, st, MAXLNLEN);
1855 free(st);
1856 }
1857 st = pSMgr->suggest_morph_for_spelling_error(cw);
1858 if (st) {
1859 if (*result) mystrcat(result, "\n", MAXLNLEN);
1860 mystrcat(result, st, MAXLNLEN);
1861 free(st);
1862 }
1863 if (abbv) {
1864 memcpy(wspace,cw,wl);
1865 *(wspace+wl) = '.';
1866 *(wspace+wl+1) = '\0';
1867 mkallsmall(wspace);
1868 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1869 if (st) {
1870 if (*result) mystrcat(result, "\n", MAXLNLEN);
1871 mystrcat(result, st, MAXLNLEN);
1872 free(st);
1873 }
1874 mkinitcap(wspace);
1875 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1876 if (st) {
1877 if (*result) mystrcat(result, "\n", MAXLNLEN);
1878 mystrcat(result, st, MAXLNLEN);
1879 free(st);
1880 }
1881 }
1882 break;
1883 }
1884 case HUHCAP: {
1885 st = pSMgr->suggest_morph_for_spelling_error(cw);
1886 if (st) {
1887 mystrcat(result, st, MAXLNLEN);
1888 free(st);
1889 }
1890 memcpy(wspace,cw,(wl+1));
1891 mkallsmall(wspace);
1892 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1893 if (st) {
1894 if (*result) mystrcat(result, "\n", MAXLNLEN);
1895 mystrcat(result, st, MAXLNLEN);
1896 free(st);
1897 }
1898 break;
1899 }
1900 case ALLCAP: {
1901 memcpy(wspace,cw,(wl+1));
1902 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1903 if (st) {
1904 mystrcat(result, st, MAXLNLEN);
1905 free(st);
1906 }
1907 mkallsmall(wspace);
1908 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1909 if (st) {
1910 if (*result) mystrcat(result, "\n", MAXLNLEN);
1911 mystrcat(result, st, MAXLNLEN);
1912 free(st);
1913 }
1914 mkinitcap(wspace);
1915 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1916 if (st) {
1917 if (*result) mystrcat(result, "\n", MAXLNLEN);
1918 mystrcat(result, st, MAXLNLEN);
1919 free(st);
1920 }
1921 if (abbv) {
1922 memcpy(wspace,cw,(wl+1));
1923 *(wspace+wl) = '.';
1924 *(wspace+wl+1) = '\0';
1925 if (*result) mystrcat(result, "\n", MAXLNLEN);
1926 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1927 if (st) {
1928 mystrcat(result, st, MAXLNLEN);
1929 free(st);
1930 }
1931 mkallsmall(wspace);
1932 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1933 if (st) {
1934 if (*result) mystrcat(result, "\n", MAXLNLEN);
1935 mystrcat(result, st, MAXLNLEN);
1936 free(st);
1937 }
1938 mkinitcap(wspace);
1939 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1940 if (st) {
1941 if (*result) mystrcat(result, "\n", MAXLNLEN);
1942 mystrcat(result, st, MAXLNLEN);
1943 free(st);
1944 }
1945 }
1946 break;
1947 }
1948 }
1949
1950 if (*result) return mystrdup(result);
1951 return NULL;
1952 }
1953
1954 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1955
Hunspell_create(const char * affpath,const char * dpath)1956 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
1957 {
1958 return (Hunhandle*)(new Hunspell(affpath, dpath));
1959 }
1960
Hunspell_create_key(const char * affpath,const char * dpath,const char * key)1961 Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
1962 const char * key)
1963 {
1964 return (Hunhandle*)(new Hunspell(affpath, dpath, key));
1965 }
1966
Hunspell_destroy(Hunhandle * pHunspell)1967 void Hunspell_destroy(Hunhandle *pHunspell)
1968 {
1969 delete (Hunspell*)(pHunspell);
1970 }
1971
Hunspell_spell(Hunhandle * pHunspell,const char * word)1972 int Hunspell_spell(Hunhandle *pHunspell, const char *word)
1973 {
1974 return ((Hunspell*)pHunspell)->spell(word);
1975 }
1976
Hunspell_get_dic_encoding(Hunhandle * pHunspell)1977 char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
1978 {
1979 return ((Hunspell*)pHunspell)->get_dic_encoding();
1980 }
1981
Hunspell_suggest(Hunhandle * pHunspell,char *** slst,const char * word)1982 int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
1983 {
1984 return ((Hunspell*)pHunspell)->suggest(slst, word);
1985 }
1986
Hunspell_analyze(Hunhandle * pHunspell,char *** slst,const char * word)1987 int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
1988 {
1989 return ((Hunspell*)pHunspell)->analyze(slst, word);
1990 }
1991
Hunspell_stem(Hunhandle * pHunspell,char *** slst,const char * word)1992 int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
1993 {
1994 return ((Hunspell*)pHunspell)->stem(slst, word);
1995 }
1996
Hunspell_stem2(Hunhandle * pHunspell,char *** slst,char ** desc,int n)1997 int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
1998 {
1999 return ((Hunspell*)pHunspell)->stem(slst, desc, n);
2000 }
2001
Hunspell_generate(Hunhandle * pHunspell,char *** slst,const char * word,const char * word2)2002 int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
2003 const char * word2)
2004 {
2005 return ((Hunspell*)pHunspell)->generate(slst, word, word2);
2006 }
2007
Hunspell_generate2(Hunhandle * pHunspell,char *** slst,const char * word,char ** desc,int n)2008 int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
2009 char** desc, int n)
2010 {
2011 return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
2012 }
2013
2014 /* functions for run-time modification of the dictionary */
2015
2016 /* add word to the run-time dictionary */
2017
Hunspell_add(Hunhandle * pHunspell,const char * word)2018 int Hunspell_add(Hunhandle *pHunspell, const char * word) {
2019 return ((Hunspell*)pHunspell)->add(word);
2020 }
2021
2022 /* add word to the run-time dictionary with affix flags of
2023 * the example (a dictionary word): Hunspell will recognize
2024 * affixed forms of the new word, too.
2025 */
2026
Hunspell_add_with_affix(Hunhandle * pHunspell,const char * word,const char * example)2027 int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
2028 const char * example) {
2029 return ((Hunspell*)pHunspell)->add_with_affix(word, example);
2030 }
2031
2032 /* remove word from the run-time dictionary */
2033
Hunspell_remove(Hunhandle * pHunspell,const char * word)2034 int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
2035 return ((Hunspell*)pHunspell)->remove(word);
2036 }
2037
Hunspell_free_list(Hunhandle *,char *** slst,int n)2038 void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
2039 freelist(slst, n);
2040 }
2041