1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * Copyright (C) 2002-2017 Németh László
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17 *
18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23 *
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
35 *
36 * ***** END LICENSE BLOCK ***** */
37 /*
38 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39 * And Contributors. All rights reserved.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 *
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 *
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 *
52 * 3. All modifications to the source code must be clearly marked as
53 * such. Binary redistributions based on modified source code
54 * must be clearly marked as modified versions in the documentation
55 * and/or other materials provided with the distribution.
56 *
57 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
61 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 */
70
71 #include <stdlib.h>
72 #include <string.h>
73 #include <stdio.h>
74
75 #include "affixmgr.hxx"
76 #include "hunspell.hxx"
77 #include "suggestmgr.hxx"
78 #include "hunspell.h"
79 #include "csutil.hxx"
80
81 #include <limits>
82 #include <string>
83
84 #define MAXWORDUTF8LEN (MAXWORDLEN * 3)
85
86 class HunspellImpl
87 {
88 public:
89 HunspellImpl(const char* affpath, const char* dpath, const char* key);
90 ~HunspellImpl();
91 int add_dic(const char* dpath, const char* key);
92 std::vector<std::string> suffix_suggest(const std::string& root_word);
93 std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
94 std::vector<std::string> generate(const std::string& word, const std::string& pattern);
95 std::vector<std::string> stem(const std::string& word);
96 std::vector<std::string> stem(const std::vector<std::string>& morph);
97 std::vector<std::string> analyze(const std::string& word);
98 int get_langnum() const;
99 bool input_conv(const std::string& word, std::string& dest);
100 bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
101 std::vector<std::string> suggest(const std::string& word);
102 const std::string& get_wordchars() const;
103 const std::vector<w_char>& get_wordchars_utf16() const;
104 const std::string& get_dict_encoding() const;
105 int add(const std::string& word);
106 int add_with_affix(const std::string& word, const std::string& example);
107 int remove(const std::string& word);
108 const std::string& get_version() const;
109 struct cs_info* get_csconv();
110 std::vector<char> dic_encoding_vec;
111
112 private:
113 AffixMgr* pAMgr;
114 std::vector<HashMgr*> m_HMgrs;
115 SuggestMgr* pSMgr;
116 char* affixpath;
117 std::string encoding;
118 struct cs_info* csconv;
119 int langnum;
120 int utf8;
121 int complexprefixes;
122 std::vector<std::string> wordbreak;
123
124 private:
125 void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
126 size_t cleanword2(std::string& dest,
127 std::vector<w_char>& dest_u,
128 const std::string& src,
129 int* pcaptype,
130 size_t* pabbrev);
131 void mkinitcap(std::string& u8);
132 int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
133 int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
134 void mkallcap(std::string& u8);
135 int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
136 struct hentry* checkword(const std::string& source, int* info, std::string* root);
137 std::string sharps_u8_l1(const std::string& source);
138 hentry*
139 spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);
140 int is_keepcase(const hentry* rv);
141 void insert_sug(std::vector<std::string>& slst, const std::string& word);
142 void cat_result(std::string& result, const std::string& st);
143 std::vector<std::string> spellml(const std::string& word);
144 std::string get_xml_par(const char* par);
145 const char* get_xml_pos(const char* s, const char* attr);
146 std::vector<std::string> get_xml_list(const char* list, const char* tag);
147 int check_xml_par(const char* q, const char* attr, const char* value);
148 private:
149 HunspellImpl(const HunspellImpl&);
150 HunspellImpl& operator=(const HunspellImpl&);
151 };
152
Hunspell(const char * affpath,const char * dpath,const char * key)153 Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
154 : m_Impl(new HunspellImpl(affpath, dpath, key)) {
155 }
156
HunspellImpl(const char * affpath,const char * dpath,const char * key)157 HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) {
158 csconv = NULL;
159 utf8 = 0;
160 complexprefixes = 0;
161 affixpath = mystrdup(affpath);
162
163 /* first set up the hash manager */
164 m_HMgrs.push_back(new HashMgr(dpath, affpath, key));
165
166 /* next set up the affix manager */
167 /* it needs access to the hash manager lookup methods */
168 pAMgr = new AffixMgr(affpath, m_HMgrs, key);
169
170 /* get the preferred try string and the dictionary */
171 /* encoding from the Affix Manager for that dictionary */
172 char* try_string = pAMgr->get_try_string();
173 encoding = pAMgr->get_encoding();
174 langnum = pAMgr->get_langnum();
175 utf8 = pAMgr->get_utf8();
176 if (!utf8)
177 csconv = get_current_cs(encoding);
178 complexprefixes = pAMgr->get_complexprefixes();
179 wordbreak = pAMgr->get_breaktable();
180
181 dic_encoding_vec.resize(encoding.size()+1);
182 strcpy(&dic_encoding_vec[0], encoding.c_str());
183
184 /* and finally set up the suggestion manager */
185 pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
186 if (try_string)
187 free(try_string);
188 }
189
~Hunspell()190 Hunspell::~Hunspell() {
191 delete m_Impl;
192 }
193
~HunspellImpl()194 HunspellImpl::~HunspellImpl() {
195 delete pSMgr;
196 delete pAMgr;
197 for (size_t i = 0; i < m_HMgrs.size(); ++i)
198 delete m_HMgrs[i];
199 pSMgr = NULL;
200 pAMgr = NULL;
201 #ifdef MOZILLA_CLIENT
202 delete[] csconv;
203 #endif
204 csconv = NULL;
205 if (affixpath)
206 free(affixpath);
207 affixpath = NULL;
208 }
209
210 // load extra dictionaries
add_dic(const char * dpath,const char * key)211 int Hunspell::add_dic(const char* dpath, const char* key) {
212 return m_Impl->add_dic(dpath, key);
213 }
214
215 // load extra dictionaries
add_dic(const char * dpath,const char * key)216 int HunspellImpl::add_dic(const char* dpath, const char* key) {
217 if (!affixpath)
218 return 1;
219 m_HMgrs.push_back(new HashMgr(dpath, affixpath, key));
220 return 0;
221 }
222
223 // make a copy of src at destination while removing all leading
224 // blanks and removing any trailing periods after recording
225 // their presence with the abbreviation flag
226 // also since already going through character by character,
227 // set the capitalization type
228 // return the length of the "cleaned" (and UTF-8 encoded) word
229
cleanword2(std::string & dest,std::vector<w_char> & dest_utf,const std::string & src,int * pcaptype,size_t * pabbrev)230 size_t HunspellImpl::cleanword2(std::string& dest,
231 std::vector<w_char>& dest_utf,
232 const std::string& src,
233 int* pcaptype,
234 size_t* pabbrev) {
235 dest.clear();
236 dest_utf.clear();
237
238 const char* q = src.c_str();
239
240 // first skip over any leading blanks
241 while (*q == ' ')
242 ++q;
243
244 // now strip off any trailing periods (recording their presence)
245 *pabbrev = 0;
246 int nl = strlen(q);
247 while ((nl > 0) && (*(q + nl - 1) == '.')) {
248 nl--;
249 (*pabbrev)++;
250 }
251
252 // if no characters are left it can't be capitalized
253 if (nl <= 0) {
254 *pcaptype = NOCAP;
255 return 0;
256 }
257
258 dest.append(q, nl);
259 nl = dest.size();
260 if (utf8) {
261 u8_u16(dest_utf, dest);
262 *pcaptype = get_captype_utf8(dest_utf, langnum);
263 } else {
264 *pcaptype = get_captype(dest, csconv);
265 }
266 return nl;
267 }
268
cleanword(std::string & dest,const std::string & src,int * pcaptype,int * pabbrev)269 void HunspellImpl::cleanword(std::string& dest,
270 const std::string& src,
271 int* pcaptype,
272 int* pabbrev) {
273 dest.clear();
274 const unsigned char* q = (const unsigned char*)src.c_str();
275 int firstcap = 0;
276
277 // first skip over any leading blanks
278 while (*q == ' ')
279 ++q;
280
281 // now strip off any trailing periods (recording their presence)
282 *pabbrev = 0;
283 int nl = strlen((const char*)q);
284 while ((nl > 0) && (*(q + nl - 1) == '.')) {
285 nl--;
286 (*pabbrev)++;
287 }
288
289 // if no characters are left it can't be capitalized
290 if (nl <= 0) {
291 *pcaptype = NOCAP;
292 return;
293 }
294
295 // now determine the capitalization type of the first nl letters
296 int ncap = 0;
297 int nneutral = 0;
298 int nc = 0;
299
300 if (!utf8) {
301 while (nl > 0) {
302 nc++;
303 if (csconv[(*q)].ccase)
304 ncap++;
305 if (csconv[(*q)].cupper == csconv[(*q)].clower)
306 nneutral++;
307 dest.push_back(*q++);
308 nl--;
309 }
310 // remember to terminate the destination string
311 firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
312 } else {
313 std::vector<w_char> t;
314 u8_u16(t, src);
315 for (size_t i = 0; i < t.size(); ++i) {
316 unsigned short idx = (t[i].h << 8) + t[i].l;
317 unsigned short low = unicodetolower(idx, langnum);
318 if (idx != low)
319 ncap++;
320 if (unicodetoupper(idx, langnum) == low)
321 nneutral++;
322 }
323 u16_u8(dest, t);
324 if (ncap) {
325 unsigned short idx = (t[0].h << 8) + t[0].l;
326 firstcap = (idx != unicodetolower(idx, langnum));
327 }
328 }
329
330 // now finally set the captype
331 if (ncap == 0) {
332 *pcaptype = NOCAP;
333 } else if ((ncap == 1) && firstcap) {
334 *pcaptype = INITCAP;
335 } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
336 *pcaptype = ALLCAP;
337 } else if ((ncap > 1) && firstcap) {
338 *pcaptype = HUHINITCAP;
339 } else {
340 *pcaptype = HUHCAP;
341 }
342 }
343
mkallcap(std::string & u8)344 void HunspellImpl::mkallcap(std::string& u8) {
345 if (utf8) {
346 std::vector<w_char> u16;
347 u8_u16(u16, u8);
348 ::mkallcap_utf(u16, langnum);
349 u16_u8(u8, u16);
350 } else {
351 ::mkallcap(u8, csconv);
352 }
353 }
354
mkallsmall2(std::string & u8,std::vector<w_char> & u16)355 int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
356 if (utf8) {
357 ::mkallsmall_utf(u16, langnum);
358 u16_u8(u8, u16);
359 } else {
360 ::mkallsmall(u8, csconv);
361 }
362 return u8.size();
363 }
364
365 // convert UTF-8 sharp S codes to latin 1
sharps_u8_l1(const std::string & source)366 std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
367 std::string dest(source);
368 mystrrep(dest, "\xC3\x9F", "\xDF");
369 return dest;
370 }
371
372 // recursive search for right ss - sharp s permutations
spellsharps(std::string & base,size_t n_pos,int n,int repnum,int * info,std::string * root)373 hentry* HunspellImpl::spellsharps(std::string& base,
374 size_t n_pos,
375 int n,
376 int repnum,
377 int* info,
378 std::string* root) {
379 size_t pos = base.find("ss", n_pos);
380 if (pos != std::string::npos && (n < MAXSHARPS)) {
381 base[pos] = '\xC3';
382 base[pos + 1] = '\x9F';
383 hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
384 if (h)
385 return h;
386 base[pos] = 's';
387 base[pos + 1] = 's';
388 h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
389 if (h)
390 return h;
391 } else if (repnum > 0) {
392 if (utf8)
393 return checkword(base, info, root);
394 std::string tmp(sharps_u8_l1(base));
395 return checkword(tmp, info, root);
396 }
397 return NULL;
398 }
399
is_keepcase(const hentry * rv)400 int HunspellImpl::is_keepcase(const hentry* rv) {
401 return pAMgr && rv->astr && pAMgr->get_keepcase() &&
402 TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
403 }
404
405 /* insert a word to the beginning of the suggestion array */
insert_sug(std::vector<std::string> & slst,const std::string & word)406 void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
407 slst.insert(slst.begin(), word);
408 }
409
spell(const std::string & word,int * info,std::string * root)410 bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
411 return m_Impl->spell(word, info, root);
412 }
413
spell(const std::string & word,int * info,std::string * root)414 bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {
415 struct hentry* rv = NULL;
416
417 int info2 = 0;
418 if (!info)
419 info = &info2;
420 else
421 *info = 0;
422
423 // Hunspell supports XML input of the simplified API (see manual)
424 if (word == SPELL_XML)
425 return true;
426 if (utf8) {
427 if (word.size() >= MAXWORDUTF8LEN)
428 return false;
429 } else {
430 if (word.size() >= MAXWORDLEN)
431 return false;
432 }
433 int captype = NOCAP;
434 size_t abbv = 0;
435 size_t wl = 0;
436
437 std::string scw;
438 std::vector<w_char> sunicw;
439
440 // input conversion
441 RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
442 {
443 std::string wspace;
444
445 bool convstatus = rl ? rl->conv(word, wspace) : false;
446 if (convstatus)
447 wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
448 else
449 wl = cleanword2(scw, sunicw, word, &captype, &abbv);
450 }
451
452 #ifdef MOZILLA_CLIENT
453 // accept the abbreviated words without dots
454 // workaround for the incomplete tokenization of Mozilla
455 abbv = 1;
456 #endif
457
458 if (wl == 0 || m_HMgrs.empty())
459 return true;
460 if (root)
461 root->clear();
462
463 // allow numbers with dots, dashes and commas (but forbid double separators:
464 // "..", "--" etc.)
465 enum { NBEGIN, NNUM, NSEP };
466 int nstate = NBEGIN;
467 size_t i;
468
469 for (i = 0; (i < wl); i++) {
470 if ((scw[i] <= '9') && (scw[i] >= '0')) {
471 nstate = NNUM;
472 } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
473 if ((nstate == NSEP) || (i == 0))
474 break;
475 nstate = NSEP;
476 } else
477 break;
478 }
479 if ((i == wl) && (nstate == NNUM))
480 return true;
481
482 switch (captype) {
483 case HUHCAP:
484 /* FALLTHROUGH */
485 case HUHINITCAP:
486 *info += SPELL_ORIGCAP;
487 /* FALLTHROUGH */
488 case NOCAP:
489 rv = checkword(scw, info, root);
490 if ((abbv) && !(rv)) {
491 std::string u8buffer(scw);
492 u8buffer.push_back('.');
493 rv = checkword(u8buffer, info, root);
494 }
495 break;
496 case ALLCAP: {
497 *info += SPELL_ORIGCAP;
498 rv = checkword(scw, info, root);
499 if (rv)
500 break;
501 if (abbv) {
502 std::string u8buffer(scw);
503 u8buffer.push_back('.');
504 rv = checkword(u8buffer, info, root);
505 if (rv)
506 break;
507 }
508 // Spec. prefix handling for Catalan, French, Italian:
509 // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
510 size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
511 if (apos != std::string::npos) {
512 mkallsmall2(scw, sunicw);
513 //conversion may result in string with different len to pre-mkallsmall2
514 //so re-scan
515 if (apos != std::string::npos && apos < scw.size() - 1) {
516 std::string part1 = scw.substr(0, apos+1);
517 std::string part2 = scw.substr(apos+1);
518 if (utf8) {
519 std::vector<w_char> part1u, part2u;
520 u8_u16(part1u, part1);
521 u8_u16(part2u, part2);
522 mkinitcap2(part2, part2u);
523 scw = part1 + part2;
524 sunicw = part1u;
525 sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
526 rv = checkword(scw, info, root);
527 if (rv)
528 break;
529 } else {
530 mkinitcap2(part2, sunicw);
531 scw = part1 + part2;
532 rv = checkword(scw, info, root);
533 if (rv)
534 break;
535 }
536 mkinitcap2(scw, sunicw);
537 rv = checkword(scw, info, root);
538 if (rv)
539 break;
540 }
541 }
542 if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
543
544 mkallsmall2(scw, sunicw);
545 std::string u8buffer(scw);
546 rv = spellsharps(u8buffer, 0, 0, 0, info, root);
547 if (!rv) {
548 mkinitcap2(scw, sunicw);
549 rv = spellsharps(scw, 0, 0, 0, info, root);
550 }
551 if ((abbv) && !(rv)) {
552 u8buffer.push_back('.');
553 rv = spellsharps(u8buffer, 0, 0, 0, info, root);
554 if (!rv) {
555 u8buffer = std::string(scw);
556 u8buffer.push_back('.');
557 rv = spellsharps(u8buffer, 0, 0, 0, info, root);
558 }
559 }
560 if (rv)
561 break;
562 }
563 }
564 case INITCAP: {
565
566 *info += SPELL_ORIGCAP;
567 mkallsmall2(scw, sunicw);
568 std::string u8buffer(scw);
569 mkinitcap2(scw, sunicw);
570 if (captype == INITCAP)
571 *info += SPELL_INITCAP;
572 rv = checkword(scw, info, root);
573 if (captype == INITCAP)
574 *info -= SPELL_INITCAP;
575 // forbid bad capitalization
576 // (for example, ijs -> Ijs instead of IJs in Dutch)
577 // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
578 if (*info & SPELL_FORBIDDEN) {
579 rv = NULL;
580 break;
581 }
582 if (rv && is_keepcase(rv) && (captype == ALLCAP))
583 rv = NULL;
584 if (rv)
585 break;
586
587 rv = checkword(u8buffer, info, root);
588 if (abbv && !rv) {
589 u8buffer.push_back('.');
590 rv = checkword(u8buffer, info, root);
591 if (!rv) {
592 u8buffer = scw;
593 u8buffer.push_back('.');
594 if (captype == INITCAP)
595 *info += SPELL_INITCAP;
596 rv = checkword(u8buffer, info, root);
597 if (captype == INITCAP)
598 *info -= SPELL_INITCAP;
599 if (rv && is_keepcase(rv) && (captype == ALLCAP))
600 rv = NULL;
601 break;
602 }
603 }
604 if (rv && is_keepcase(rv) &&
605 ((captype == ALLCAP) ||
606 // if CHECKSHARPS: KEEPCASE words with \xDF are allowed
607 // in INITCAP form, too.
608 !(pAMgr->get_checksharps() &&
609 ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
610 (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
611 rv = NULL;
612 break;
613 }
614 }
615
616 if (rv) {
617 if (pAMgr && pAMgr->get_warn() && rv->astr &&
618 TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
619 *info += SPELL_WARN;
620 if (pAMgr->get_forbidwarn())
621 return false;
622 return true;
623 }
624 return true;
625 }
626
627 // recursive breaking at break points
628 if (!wordbreak.empty()) {
629
630 int nbr = 0;
631 wl = scw.size();
632
633 // calculate break points for recursion limit
634 for (size_t j = 0; j < wordbreak.size(); ++j) {
635 size_t pos = 0;
636 while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) {
637 ++nbr;
638 pos += wordbreak[j].size();
639 }
640 }
641 if (nbr >= 10)
642 return false;
643
644 // check boundary patterns (^begin and end$)
645 for (size_t j = 0; j < wordbreak.size(); ++j) {
646 size_t plen = wordbreak[j].size();
647 if (plen == 1 || plen > wl)
648 continue;
649
650 if (wordbreak[j][0] == '^' &&
651 scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1)))
652 return true;
653
654 if (wordbreak[j][plen - 1] == '$' &&
655 scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) {
656 std::string suffix(scw.substr(wl - plen + 1));
657 scw.resize(wl - plen + 1);
658 if (spell(scw))
659 return true;
660 scw.append(suffix);
661 }
662 }
663
664 // other patterns
665 for (size_t j = 0; j < wordbreak.size(); ++j) {
666 size_t plen = wordbreak[j].size();
667 size_t found = scw.find(wordbreak[j]);
668 if ((found > 0) && (found < wl - plen)) {
669 if (!spell(scw.substr(found + plen)))
670 continue;
671 std::string suffix(scw.substr(found));
672 scw.resize(found);
673 // examine 2 sides of the break point
674 if (spell(scw))
675 return true;
676 scw.append(suffix);
677
678 // LANG_hu: spec. dash rule
679 if (langnum == LANG_hu && wordbreak[j] == "-") {
680 suffix = scw.substr(found + 1);
681 scw.resize(found + 1);
682 if (spell(scw))
683 return true; // check the first part with dash
684 scw.append(suffix);
685 }
686 // end of LANG specific region
687 }
688 }
689 }
690
691 return false;
692 }
693
checkword(const std::string & w,int * info,std::string * root)694 struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
695 bool usebuffer = false;
696 std::string w2;
697 const char* word;
698 int len;
699
700 const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
701 if (ignoredchars != NULL) {
702 w2.assign(w);
703 if (utf8) {
704 const std::vector<w_char>& ignoredchars_utf16 =
705 pAMgr->get_ignore_utf16();
706 remove_ignored_chars_utf(w2, ignoredchars_utf16);
707 } else {
708 remove_ignored_chars(w2, ignoredchars);
709 }
710 word = w2.c_str();
711 len = w2.size();
712 usebuffer = true;
713 } else {
714 word = w.c_str();
715 len = w.size();
716 }
717
718 if (!len)
719 return NULL;
720
721 // word reversing wrapper for complex prefixes
722 if (complexprefixes) {
723 if (!usebuffer) {
724 w2.assign(word);
725 usebuffer = true;
726 }
727 if (utf8)
728 reverseword_utf(w2);
729 else
730 reverseword(w2);
731 }
732
733 if (usebuffer) {
734 word = w2.c_str();
735 }
736
737 // look word in hash table
738 struct hentry* he = NULL;
739 for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
740 he = m_HMgrs[i]->lookup(word);
741
742 // check forbidden and onlyincompound words
743 if ((he) && (he->astr) && (pAMgr) &&
744 TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
745 if (info)
746 *info += SPELL_FORBIDDEN;
747 // LANG_hu section: set dash information for suggestions
748 if (langnum == LANG_hu) {
749 if (pAMgr->get_compoundflag() &&
750 TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
751 if (info)
752 *info += SPELL_COMPOUND;
753 }
754 }
755 return NULL;
756 }
757
758 // he = next not needaffix, onlyincompound homonym or onlyupcase word
759 while (he && (he->astr) && pAMgr &&
760 ((pAMgr->get_needaffix() &&
761 TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
762 (pAMgr->get_onlyincompound() &&
763 TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
764 (info && (*info & SPELL_INITCAP) &&
765 TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
766 he = he->next_homonym;
767 }
768
769 // check with affixes
770 if (!he && pAMgr) {
771 // try stripping off affixes */
772 he = pAMgr->affix_check(word, len, 0);
773
774 // check compound restriction and onlyupcase
775 if (he && he->astr &&
776 ((pAMgr->get_onlyincompound() &&
777 TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
778 (info && (*info & SPELL_INITCAP) &&
779 TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
780 he = NULL;
781 }
782
783 if (he) {
784 if ((he->astr) && (pAMgr) &&
785 TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
786 if (info)
787 *info += SPELL_FORBIDDEN;
788 return NULL;
789 }
790 if (root) {
791 root->assign(he->word);
792 if (complexprefixes) {
793 if (utf8)
794 reverseword_utf(*root);
795 else
796 reverseword(*root);
797 }
798 }
799 // try check compound word
800 } else if (pAMgr->get_compound()) {
801 struct hentry* rwords[100]; // buffer for COMPOUND pattern checking
802 he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
803 // LANG_hu section: `moving rule' with last dash
804 if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
805 std::string dup(word, len - 1);
806 he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info);
807 }
808 // end of LANG specific region
809 if (he) {
810 if (root) {
811 root->assign(he->word);
812 if (complexprefixes) {
813 if (utf8)
814 reverseword_utf(*root);
815 else
816 reverseword(*root);
817 }
818 }
819 if (info)
820 *info += SPELL_COMPOUND;
821 }
822 }
823 }
824
825 return he;
826 }
827
suggest(const std::string & word)828 std::vector<std::string> Hunspell::suggest(const std::string& word) {
829 return m_Impl->suggest(word);
830 }
831
suggest(const std::string & word)832 std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
833 std::vector<std::string> slst;
834
835 int onlycmpdsug = 0;
836 if (!pSMgr || m_HMgrs.empty())
837 return slst;
838
839 // process XML input of the simplified API (see manual)
840 if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
841 return spellml(word);
842 }
843 if (utf8) {
844 if (word.size() >= MAXWORDUTF8LEN)
845 return slst;
846 } else {
847 if (word.size() >= MAXWORDLEN)
848 return slst;
849 }
850 int captype = NOCAP;
851 size_t abbv = 0;
852 size_t wl = 0;
853
854 std::string scw;
855 std::vector<w_char> sunicw;
856
857 // input conversion
858 RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
859 {
860 std::string wspace;
861
862 bool convstatus = rl ? rl->conv(word, wspace) : false;
863 if (convstatus)
864 wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
865 else
866 wl = cleanword2(scw, sunicw, word, &captype, &abbv);
867
868 if (wl == 0)
869 return slst;
870 }
871
872 int capwords = 0;
873
874 // check capitalized form for FORCEUCASE
875 if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
876 int info = SPELL_ORIGCAP;
877 if (checkword(scw, &info, NULL)) {
878 std::string form(scw);
879 mkinitcap(form);
880 slst.push_back(form);
881 return slst;
882 }
883 }
884
885 switch (captype) {
886 case NOCAP: {
887 pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
888 break;
889 }
890
891 case INITCAP: {
892 capwords = 1;
893 pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
894 std::string wspace(scw);
895 mkallsmall2(wspace, sunicw);
896 pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
897 break;
898 }
899 case HUHINITCAP:
900 capwords = 1;
901 case HUHCAP: {
902 pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
903 // something.The -> something. The
904 size_t dot_pos = scw.find('.');
905 if (dot_pos != std::string::npos) {
906 std::string postdot = scw.substr(dot_pos + 1);
907 int captype_;
908 if (utf8) {
909 std::vector<w_char> postdotu;
910 u8_u16(postdotu, postdot);
911 captype_ = get_captype_utf8(postdotu, langnum);
912 } else {
913 captype_ = get_captype(postdot, csconv);
914 }
915 if (captype_ == INITCAP) {
916 std::string str(scw);
917 str.insert(dot_pos + 1, 1, ' ');
918 insert_sug(slst, str);
919 }
920 }
921
922 std::string wspace;
923
924 if (captype == HUHINITCAP) {
925 // TheOpenOffice.org -> The OpenOffice.org
926 wspace = scw;
927 mkinitsmall2(wspace, sunicw);
928 pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
929 }
930 wspace = scw;
931 mkallsmall2(wspace, sunicw);
932 if (spell(wspace.c_str()))
933 insert_sug(slst, wspace);
934 size_t prevns = slst.size();
935 pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
936 if (captype == HUHINITCAP) {
937 mkinitcap2(wspace, sunicw);
938 if (spell(wspace.c_str()))
939 insert_sug(slst, wspace);
940 pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
941 }
942 // aNew -> "a New" (instead of "a new")
943 for (size_t j = prevns; j < slst.size(); ++j) {
944 const char* space = strchr(slst[j].c_str(), ' ');
945 if (space) {
946 size_t slen = strlen(space + 1);
947 // different case after space (need capitalisation)
948 if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
949 std::string first(slst[j].c_str(), space + 1);
950 std::string second(space + 1);
951 std::vector<w_char> w;
952 if (utf8)
953 u8_u16(w, second);
954 mkinitcap2(second, w);
955 // set as first suggestion
956 slst.erase(slst.begin() + j);
957 slst.insert(slst.begin(), first + second);
958 }
959 }
960 }
961 break;
962 }
963
964 case ALLCAP: {
965 std::string wspace(scw);
966 mkallsmall2(wspace, sunicw);
967 pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
968 if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
969 insert_sug(slst, wspace);
970 mkinitcap2(wspace, sunicw);
971 pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
972 for (size_t j = 0; j < slst.size(); ++j) {
973 mkallcap(slst[j]);
974 if (pAMgr && pAMgr->get_checksharps()) {
975 if (utf8) {
976 mystrrep(slst[j], "\xC3\x9F", "SS");
977 } else {
978 mystrrep(slst[j], "\xDF", "SS");
979 }
980 }
981 }
982 break;
983 }
984 }
985
986 // LANG_hu section: replace '-' with ' ' in Hungarian
987 if (langnum == LANG_hu) {
988 for (size_t j = 0; j < slst.size(); ++j) {
989 size_t pos = slst[j].find('-');
990 if (pos != std::string::npos) {
991 int info;
992 std::string w(slst[j].substr(0, pos));
993 w.append(slst[j].substr(pos + 1));
994 (void)spell(w, &info, NULL);
995 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
996 slst[j][pos] = ' ';
997 } else
998 slst[j][pos] = '-';
999 }
1000 }
1001 }
1002 // END OF LANG_hu section
1003
1004 // try ngram approach since found nothing or only compound words
1005 if (pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
1006 switch (captype) {
1007 case NOCAP: {
1008 pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs);
1009 break;
1010 }
1011 case HUHINITCAP:
1012 capwords = 1;
1013 case HUHCAP: {
1014 std::string wspace(scw);
1015 mkallsmall2(wspace, sunicw);
1016 pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
1017 break;
1018 }
1019 case INITCAP: {
1020 capwords = 1;
1021 std::string wspace(scw);
1022 mkallsmall2(wspace, sunicw);
1023 pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
1024 break;
1025 }
1026 case ALLCAP: {
1027 std::string wspace(scw);
1028 mkallsmall2(wspace, sunicw);
1029 size_t oldns = slst.size();
1030 pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
1031 for (size_t j = oldns; j < slst.size(); ++j) {
1032 mkallcap(slst[j]);
1033 }
1034 break;
1035 }
1036 }
1037 }
1038
1039 // try dash suggestion (Afo-American -> Afro-American)
1040 size_t dash_pos = scw.find('-');
1041 if (dash_pos != std::string::npos) {
1042 int nodashsug = 1;
1043 for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
1044 if (slst[j].find('-') != std::string::npos)
1045 nodashsug = 0;
1046 }
1047
1048 size_t prev_pos = 0;
1049 bool last = false;
1050
1051 while (nodashsug && !last) {
1052 if (dash_pos == scw.size())
1053 last = 1;
1054 std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
1055 if (!spell(chunk.c_str())) {
1056 std::vector<std::string> nlst = suggest(chunk.c_str());
1057 for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) {
1058 std::string wspace = scw.substr(0, prev_pos);
1059 wspace.append(*j);
1060 if (!last) {
1061 wspace.append("-");
1062 wspace.append(scw.substr(dash_pos + 1));
1063 }
1064 insert_sug(slst, wspace);
1065 }
1066 nodashsug = 0;
1067 }
1068 if (!last) {
1069 prev_pos = dash_pos + 1;
1070 dash_pos = scw.find('-', prev_pos);
1071 }
1072 if (dash_pos == std::string::npos)
1073 dash_pos = scw.size();
1074 }
1075 }
1076
1077 // word reversing wrapper for complex prefixes
1078 if (complexprefixes) {
1079 for (size_t j = 0; j < slst.size(); ++j) {
1080 if (utf8)
1081 reverseword_utf(slst[j]);
1082 else
1083 reverseword(slst[j]);
1084 }
1085 }
1086
1087 // capitalize
1088 if (capwords)
1089 for (size_t j = 0; j < slst.size(); ++j) {
1090 mkinitcap(slst[j]);
1091 }
1092
1093 // expand suggestions with dot(s)
1094 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
1095 for (size_t j = 0; j < slst.size(); ++j) {
1096 slst[j].append(word.substr(word.size() - abbv));
1097 }
1098 }
1099
1100 // remove bad capitalized and forbidden forms
1101 if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
1102 switch (captype) {
1103 case INITCAP:
1104 case ALLCAP: {
1105 size_t l = 0;
1106 for (size_t j = 0; j < slst.size(); ++j) {
1107 if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
1108 std::string s;
1109 std::vector<w_char> w;
1110 if (utf8) {
1111 u8_u16(w, slst[j]);
1112 } else {
1113 s = slst[j];
1114 }
1115 mkallsmall2(s, w);
1116 if (spell(s)) {
1117 slst[l] = s;
1118 ++l;
1119 } else {
1120 mkinitcap2(s, w);
1121 if (spell(s)) {
1122 slst[l] = s;
1123 ++l;
1124 }
1125 }
1126 } else {
1127 slst[l] = slst[j];
1128 ++l;
1129 }
1130 }
1131 slst.resize(l);
1132 }
1133 }
1134 }
1135
1136 // remove duplications
1137 size_t l = 0;
1138 for (size_t j = 0; j < slst.size(); ++j) {
1139 slst[l] = slst[j];
1140 for (size_t k = 0; k < l; ++k) {
1141 if (slst[k] == slst[j]) {
1142 --l;
1143 break;
1144 }
1145 }
1146 ++l;
1147 }
1148 slst.resize(l);
1149
1150 // output conversion
1151 rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1152 for (size_t j = 0; rl && j < slst.size(); ++j) {
1153 std::string wspace;
1154 if (rl->conv(slst[j], wspace)) {
1155 slst[j] = wspace;
1156 }
1157 }
1158
1159 return slst;
1160 }
1161
get_dict_encoding() const1162 const std::string& Hunspell::get_dict_encoding() const {
1163 return m_Impl->get_dict_encoding();
1164 }
1165
get_dict_encoding() const1166 const std::string& HunspellImpl::get_dict_encoding() const {
1167 return encoding;
1168 }
1169
stem(const std::vector<std::string> & desc)1170 std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
1171 return m_Impl->stem(desc);
1172 }
1173
stem(const std::vector<std::string> & desc)1174 std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
1175 std::vector<std::string> slst;
1176
1177 std::string result2;
1178 if (desc.empty())
1179 return slst;
1180 for (size_t i = 0; i < desc.size(); ++i) {
1181
1182 std::string result;
1183
1184 // add compound word parts (except the last one)
1185 const char* s = desc[i].c_str();
1186 const char* part = strstr(s, MORPH_PART);
1187 if (part) {
1188 const char* nextpart = strstr(part + 1, MORPH_PART);
1189 while (nextpart) {
1190 std::string field;
1191 copy_field(field, part, MORPH_PART);
1192 result.append(field);
1193 part = nextpart;
1194 nextpart = strstr(part + 1, MORPH_PART);
1195 }
1196 s = part;
1197 }
1198
1199 std::string tok(s);
1200 size_t alt = 0;
1201 while ((alt = tok.find(" | ", alt)) != std::string::npos) {
1202 tok[alt + 1] = MSEP_ALT;
1203 }
1204 std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
1205 for (size_t k = 0; k < pl.size(); ++k) {
1206 // add derivational suffixes
1207 if (pl[k].find(MORPH_DERI_SFX) != std::string::npos) {
1208 // remove inflectional suffixes
1209 const size_t is = pl[k].find(MORPH_INFL_SFX);
1210 if (is != std::string::npos)
1211 pl[k].resize(is);
1212 std::vector<std::string> singlepl;
1213 singlepl.push_back(pl[k]);
1214 std::string sg = pSMgr->suggest_gen(singlepl, pl[k]);
1215 if (!sg.empty()) {
1216 std::vector<std::string> gen = line_tok(sg, MSEP_REC);
1217 for (size_t j = 0; j < gen.size(); ++j) {
1218 result2.push_back(MSEP_REC);
1219 result2.append(result);
1220 result2.append(gen[j]);
1221 }
1222 }
1223 } else {
1224 result2.push_back(MSEP_REC);
1225 result2.append(result);
1226 if (pl[k].find(MORPH_SURF_PFX) != std::string::npos) {
1227 std::string field;
1228 copy_field(field, pl[k], MORPH_SURF_PFX);
1229 result2.append(field);
1230 }
1231 std::string field;
1232 copy_field(field, pl[k], MORPH_STEM);
1233 result2.append(field);
1234 }
1235 }
1236 }
1237 slst = line_tok(result2, MSEP_REC);
1238 uniqlist(slst);
1239 return slst;
1240 }
1241
stem(const std::string & word)1242 std::vector<std::string> Hunspell::stem(const std::string& word) {
1243 return m_Impl->stem(word);
1244 }
1245
stem(const std::string & word)1246 std::vector<std::string> HunspellImpl::stem(const std::string& word) {
1247 return stem(analyze(word));
1248 }
1249
get_wordchars() const1250 const char* Hunspell::get_wordchars() const {
1251 return m_Impl->get_wordchars().c_str();
1252 }
1253
get_wordchars_cpp() const1254 const std::string& Hunspell::get_wordchars_cpp() const {
1255 return m_Impl->get_wordchars();
1256 }
1257
get_wordchars() const1258 const std::string& HunspellImpl::get_wordchars() const {
1259 return pAMgr->get_wordchars();
1260 }
1261
get_wordchars_utf16() const1262 const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
1263 return m_Impl->get_wordchars_utf16();
1264 }
1265
get_wordchars_utf16() const1266 const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
1267 return pAMgr->get_wordchars_utf16();
1268 }
1269
mkinitcap(std::string & u8)1270 void HunspellImpl::mkinitcap(std::string& u8) {
1271 if (utf8) {
1272 std::vector<w_char> u16;
1273 u8_u16(u16, u8);
1274 ::mkinitcap_utf(u16, langnum);
1275 u16_u8(u8, u16);
1276 } else {
1277 ::mkinitcap(u8, csconv);
1278 }
1279 }
1280
mkinitcap2(std::string & u8,std::vector<w_char> & u16)1281 int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
1282 if (utf8) {
1283 ::mkinitcap_utf(u16, langnum);
1284 u16_u8(u8, u16);
1285 } else {
1286 ::mkinitcap(u8, csconv);
1287 }
1288 return u8.size();
1289 }
1290
mkinitsmall2(std::string & u8,std::vector<w_char> & u16)1291 int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
1292 if (utf8) {
1293 ::mkinitsmall_utf(u16, langnum);
1294 u16_u8(u8, u16);
1295 } else {
1296 ::mkinitsmall(u8, csconv);
1297 }
1298 return u8.size();
1299 }
1300
add(const std::string & word)1301 int Hunspell::add(const std::string& word) {
1302 return m_Impl->add(word);
1303 }
1304
add(const std::string & word)1305 int HunspellImpl::add(const std::string& word) {
1306 if (!m_HMgrs.empty())
1307 return m_HMgrs[0]->add(word);
1308 return 0;
1309 }
1310
add_with_affix(const std::string & word,const std::string & example)1311 int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
1312 return m_Impl->add_with_affix(word, example);
1313 }
1314
add_with_affix(const std::string & word,const std::string & example)1315 int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
1316 if (!m_HMgrs.empty())
1317 return m_HMgrs[0]->add_with_affix(word, example);
1318 return 0;
1319 }
1320
remove(const std::string & word)1321 int Hunspell::remove(const std::string& word) {
1322 return m_Impl->remove(word);
1323 }
1324
remove(const std::string & word)1325 int HunspellImpl::remove(const std::string& word) {
1326 if (!m_HMgrs.empty())
1327 return m_HMgrs[0]->remove(word);
1328 return 0;
1329 }
1330
get_version() const1331 const char* Hunspell::get_version() const {
1332 return m_Impl->get_version().c_str();
1333 }
1334
get_version_cpp() const1335 const std::string& Hunspell::get_version_cpp() const {
1336 return m_Impl->get_version();
1337 }
1338
get_version() const1339 const std::string& HunspellImpl::get_version() const {
1340 return pAMgr->get_version();
1341 }
1342
get_csconv()1343 struct cs_info* HunspellImpl::get_csconv() {
1344 return csconv;
1345 }
1346
get_csconv()1347 struct cs_info* Hunspell::get_csconv() {
1348 return m_Impl->get_csconv();
1349 }
1350
cat_result(std::string & result,const std::string & st)1351 void HunspellImpl::cat_result(std::string& result, const std::string& st) {
1352 if (!st.empty()) {
1353 if (!result.empty())
1354 result.append("\n");
1355 result.append(st);
1356 }
1357 }
1358
analyze(const std::string & word)1359 std::vector<std::string> Hunspell::analyze(const std::string& word) {
1360 return m_Impl->analyze(word);
1361 }
1362
analyze(const std::string & word)1363 std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
1364 std::vector<std::string> slst;
1365 if (!pSMgr || m_HMgrs.empty())
1366 return slst;
1367 if (utf8) {
1368 if (word.size() >= MAXWORDUTF8LEN)
1369 return slst;
1370 } else {
1371 if (word.size() >= MAXWORDLEN)
1372 return slst;
1373 }
1374 int captype = NOCAP;
1375 size_t abbv = 0;
1376 size_t wl = 0;
1377
1378 std::string scw;
1379 std::vector<w_char> sunicw;
1380
1381 // input conversion
1382 RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1383 {
1384 std::string wspace;
1385
1386 bool convstatus = rl ? rl->conv(word, wspace) : false;
1387 if (convstatus)
1388 wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1389 else
1390 wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1391 }
1392
1393 if (wl == 0) {
1394 if (abbv) {
1395 scw.clear();
1396 for (wl = 0; wl < abbv; wl++)
1397 scw.push_back('.');
1398 abbv = 0;
1399 } else
1400 return slst;
1401 }
1402
1403 std::string result;
1404
1405 size_t n = 0;
1406 // test numbers
1407 // LANG_hu section: set dash information for suggestions
1408 if (langnum == LANG_hu) {
1409 size_t n2 = 0;
1410 size_t n3 = 0;
1411
1412 while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
1413 (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
1414 n++;
1415 if ((scw[n] == '.') || (scw[n] == ',')) {
1416 if (((n2 == 0) && (n > 3)) ||
1417 ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
1418 break;
1419 n2++;
1420 n3 = n;
1421 }
1422 }
1423
1424 if ((n == wl) && (n3 > 0) && (n - n3 > 3))
1425 return slst;
1426 if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
1427 checkword(scw.substr(n), NULL, NULL))) {
1428 result.append(scw);
1429 result.resize(n - 1);
1430 if (n == wl)
1431 cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
1432 else {
1433 std::string chunk = scw.substr(n - 1, 1);
1434 cat_result(result, pSMgr->suggest_morph(chunk));
1435 result.push_back('+'); // XXX SPEC. MORPHCODE
1436 cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
1437 }
1438 return line_tok(result, MSEP_REC);
1439 }
1440 }
1441 // END OF LANG_hu section
1442
1443 switch (captype) {
1444 case HUHCAP:
1445 case HUHINITCAP:
1446 case NOCAP: {
1447 cat_result(result, pSMgr->suggest_morph(scw));
1448 if (abbv) {
1449 std::string u8buffer(scw);
1450 u8buffer.push_back('.');
1451 cat_result(result, pSMgr->suggest_morph(u8buffer));
1452 }
1453 break;
1454 }
1455 case INITCAP: {
1456 mkallsmall2(scw, sunicw);
1457 std::string u8buffer(scw);
1458 mkinitcap2(scw, sunicw);
1459 cat_result(result, pSMgr->suggest_morph(u8buffer));
1460 cat_result(result, pSMgr->suggest_morph(scw));
1461 if (abbv) {
1462 u8buffer.push_back('.');
1463 cat_result(result, pSMgr->suggest_morph(u8buffer));
1464
1465 u8buffer = scw;
1466 u8buffer.push_back('.');
1467
1468 cat_result(result, pSMgr->suggest_morph(u8buffer));
1469 }
1470 break;
1471 }
1472 case ALLCAP: {
1473 cat_result(result, pSMgr->suggest_morph(scw));
1474 if (abbv) {
1475 std::string u8buffer(scw);
1476 u8buffer.push_back('.');
1477 cat_result(result, pSMgr->suggest_morph(u8buffer));
1478 }
1479 mkallsmall2(scw, sunicw);
1480 std::string u8buffer(scw);
1481 mkinitcap2(scw, sunicw);
1482
1483 cat_result(result, pSMgr->suggest_morph(u8buffer));
1484 cat_result(result, pSMgr->suggest_morph(scw));
1485 if (abbv) {
1486 u8buffer.push_back('.');
1487 cat_result(result, pSMgr->suggest_morph(u8buffer));
1488
1489 u8buffer = scw;
1490 u8buffer.push_back('.');
1491
1492 cat_result(result, pSMgr->suggest_morph(u8buffer));
1493 }
1494 break;
1495 }
1496 }
1497
1498 if (!result.empty()) {
1499 // word reversing wrapper for complex prefixes
1500 if (complexprefixes) {
1501 if (utf8)
1502 reverseword_utf(result);
1503 else
1504 reverseword(result);
1505 }
1506 return line_tok(result, MSEP_REC);
1507 }
1508
1509 // compound word with dash (HU) I18n
1510 // LANG_hu section: set dash information for suggestions
1511
1512 size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
1513 if (dash_pos != std::string::npos) {
1514 int nresult = 0;
1515
1516 std::string part1 = scw.substr(0, dash_pos);
1517 std::string part2 = scw.substr(dash_pos+1);
1518
1519 // examine 2 sides of the dash
1520 if (part2.empty()) { // base word ending with dash
1521 if (spell(part1)) {
1522 std::string p = pSMgr->suggest_morph(part1);
1523 if (!p.empty()) {
1524 slst = line_tok(p, MSEP_REC);
1525 return slst;
1526 }
1527 }
1528 } else if (part2.size() == 1 && part2[0] == 'e') { // XXX (HU) -e hat.
1529 if (spell(part1) && (spell("-e"))) {
1530 std::string st = pSMgr->suggest_morph(part1);
1531 if (!st.empty()) {
1532 result.append(st);
1533 }
1534 result.push_back('+'); // XXX spec. separator in MORPHCODE
1535 st = pSMgr->suggest_morph("-e");
1536 if (!st.empty()) {
1537 result.append(st);
1538 }
1539 return line_tok(result, MSEP_REC);
1540 }
1541 } else {
1542 // first word ending with dash: word- XXX ???
1543 part1.push_back(' ');
1544 nresult = spell(part1);
1545 part1.erase(part1.size() - 1);
1546 if (nresult && spell(part2) &&
1547 ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
1548 std::string st = pSMgr->suggest_morph(part1);
1549 if (!st.empty()) {
1550 result.append(st);
1551 result.push_back('+'); // XXX spec. separator in MORPHCODE
1552 }
1553 st = pSMgr->suggest_morph(part2);
1554 if (!st.empty()) {
1555 result.append(st);
1556 }
1557 return line_tok(result, MSEP_REC);
1558 }
1559 }
1560 // affixed number in correct word
1561 if (nresult && (dash_pos > 0) &&
1562 (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
1563 (scw[dash_pos - 1] == '.'))) {
1564 n = 1;
1565 if (scw[dash_pos - n] == '.')
1566 n++;
1567 // search first not a number character to left from dash
1568 while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
1569 (n < 6)) {
1570 n++;
1571 }
1572 if (dash_pos < n)
1573 n--;
1574 // numbers: valami1000000-hoz
1575 // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1576 // 56-hoz, 6-hoz
1577 for (; n >= 1; n--) {
1578 if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
1579 continue;
1580 }
1581 std::string chunk = scw.substr(dash_pos - n);
1582 if (checkword(chunk, NULL, NULL)) {
1583 result.append(chunk);
1584 std::string st = pSMgr->suggest_morph(chunk);
1585 if (!st.empty()) {
1586 result.append(st);
1587 }
1588 return line_tok(result, MSEP_REC);
1589 }
1590 }
1591 }
1592 }
1593 return slst;
1594 }
1595
generate(const std::string & word,const std::vector<std::string> & pl)1596 std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
1597 return m_Impl->generate(word, pl);
1598 }
1599
generate(const std::string & word,const std::vector<std::string> & pl)1600 std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
1601 std::vector<std::string> slst;
1602 if (!pSMgr || pl.empty())
1603 return slst;
1604 std::vector<std::string> pl2 = analyze(word);
1605 int captype = NOCAP;
1606 int abbv = 0;
1607 std::string cw;
1608 cleanword(cw, word, &captype, &abbv);
1609 std::string result;
1610
1611 for (size_t i = 0; i < pl.size(); ++i) {
1612 cat_result(result, pSMgr->suggest_gen(pl2, pl[i]));
1613 }
1614
1615 if (!result.empty()) {
1616 // allcap
1617 if (captype == ALLCAP)
1618 mkallcap(result);
1619
1620 // line split
1621 slst = line_tok(result, MSEP_REC);
1622
1623 // capitalize
1624 if (captype == INITCAP || captype == HUHINITCAP) {
1625 for (size_t j = 0; j < slst.size(); ++j) {
1626 mkinitcap(slst[j]);
1627 }
1628 }
1629
1630 // temporary filtering of prefix related errors (eg.
1631 // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1632 std::vector<std::string>::iterator it = slst.begin();
1633 while (it != slst.end()) {
1634 if (!spell(*it)) {
1635 it = slst.erase(it);
1636 } else {
1637 ++it;
1638 }
1639 }
1640 }
1641 return slst;
1642 }
1643
generate(const std::string & word,const std::string & pattern)1644 std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
1645 return m_Impl->generate(word, pattern);
1646 }
1647
generate(const std::string & word,const std::string & pattern)1648 std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
1649 std::vector<std::string> pl = analyze(pattern);
1650 std::vector<std::string> slst = generate(word, pl);
1651 uniqlist(slst);
1652 return slst;
1653 }
1654
1655 // minimal XML parser functions
get_xml_par(const char * par)1656 std::string HunspellImpl::get_xml_par(const char* par) {
1657 std::string dest;
1658 if (!par)
1659 return dest;
1660 char end = *par;
1661 if (end == '>')
1662 end = '<';
1663 else if (end != '\'' && end != '"')
1664 return dest; // bad XML
1665 for (par++; *par != '\0' && *par != end; ++par) {
1666 dest.push_back(*par);
1667 }
1668 mystrrep(dest, "<", "<");
1669 mystrrep(dest, "&", "&");
1670 return dest;
1671 }
1672
get_langnum() const1673 int Hunspell::get_langnum() const {
1674 return m_Impl->get_langnum();
1675 }
1676
get_langnum() const1677 int HunspellImpl::get_langnum() const {
1678 return langnum;
1679 }
1680
input_conv(const std::string & word,std::string & dest)1681 bool Hunspell::input_conv(const std::string& word, std::string& dest) {
1682 return m_Impl->input_conv(word, dest);
1683 }
1684
input_conv(const char * word,char * dest,size_t destsize)1685 int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
1686 std::string d;
1687 bool ret = input_conv(word, d);
1688 if (ret && d.size() < destsize) {
1689 strncpy(dest, d.c_str(), destsize);
1690 return 1;
1691 }
1692 return 0;
1693 }
1694
input_conv(const std::string & word,std::string & dest)1695 bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
1696 RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
1697 if (rl) {
1698 return rl->conv(word, dest);
1699 }
1700 dest.assign(word);
1701 return false;
1702 }
1703
1704 // return the beginning of the element (attr == NULL) or the attribute
get_xml_pos(const char * s,const char * attr)1705 const char* HunspellImpl::get_xml_pos(const char* s, const char* attr) {
1706 const char* end = strchr(s, '>');
1707 if (attr == NULL)
1708 return end;
1709 const char* p = s;
1710 while (1) {
1711 p = strstr(p, attr);
1712 if (!p || p >= end)
1713 return 0;
1714 if (*(p - 1) == ' ' || *(p - 1) == '\n')
1715 break;
1716 p += strlen(attr);
1717 }
1718 return p + strlen(attr);
1719 }
1720
check_xml_par(const char * q,const char * attr,const char * value)1721 int HunspellImpl::check_xml_par(const char* q,
1722 const char* attr,
1723 const char* value) {
1724 std::string cw = get_xml_par(get_xml_pos(q, attr));
1725 if (cw == value)
1726 return 1;
1727 return 0;
1728 }
1729
get_xml_list(const char * list,const char * tag)1730 std::vector<std::string> HunspellImpl::get_xml_list(const char* list, const char* tag) {
1731 std::vector<std::string> slst;
1732 if (!list)
1733 return slst;
1734 const char* p = list;
1735 for (size_t n = 0; ((p = strstr(p, tag)) != NULL); ++p, ++n) {
1736 std::string cw = get_xml_par(p + strlen(tag) - 1);
1737 if (cw.empty()) {
1738 break;
1739 }
1740 slst.push_back(cw);
1741 }
1742 return slst;
1743 }
1744
spellml(const std::string & in_word)1745 std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
1746 std::vector<std::string> slst;
1747
1748 const char* word = in_word.c_str();
1749
1750 const char* q = strstr(word, "<query");
1751 if (!q)
1752 return slst; // bad XML input
1753 const char* q2 = strchr(q, '>');
1754 if (!q2)
1755 return slst; // bad XML input
1756 q2 = strstr(q2, "<word");
1757 if (!q2)
1758 return slst; // bad XML input
1759 if (check_xml_par(q, "type=", "analyze")) {
1760 std::string cw = get_xml_par(strchr(q2, '>'));
1761 if (!cw.empty())
1762 slst = analyze(cw);
1763 if (slst.empty())
1764 return slst;
1765 // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1766 std::string r;
1767 r.append("<code>");
1768 for (size_t i = 0; i < slst.size(); ++i) {
1769 r.append("<a>");
1770
1771 std::string entry(slst[i]);
1772 mystrrep(entry, "\t", " ");
1773 mystrrep(entry, "&", "&");
1774 mystrrep(entry, "<", "<");
1775 r.append(entry);
1776
1777 r.append("</a>");
1778 }
1779 r.append("</code>");
1780 slst.clear();
1781 slst.push_back(r);
1782 return slst;
1783 } else if (check_xml_par(q, "type=", "stem")) {
1784 std::string cw = get_xml_par(strchr(q2, '>'));
1785 if (!cw.empty())
1786 return stem(cw);
1787 } else if (check_xml_par(q, "type=", "generate")) {
1788 std::string cw = get_xml_par(strchr(q2, '>'));
1789 if (cw.empty())
1790 return slst;
1791 const char* q3 = strstr(q2 + 1, "<word");
1792 if (q3) {
1793 std::string cw2 = get_xml_par(strchr(q3, '>'));
1794 if (!cw2.empty()) {
1795 return generate(cw, cw2);
1796 }
1797 } else {
1798 if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
1799 std::vector<std::string> slst2 = get_xml_list(strchr(q2, '>'), "<a>");
1800 if (!slst2.empty()) {
1801 slst = generate(cw, slst2);
1802 uniqlist(slst);
1803 return slst;
1804 }
1805 }
1806 }
1807 }
1808 return slst;
1809 }
1810
spell(const char * word,int * info,char ** root)1811 int Hunspell::spell(const char* word, int* info, char** root) {
1812 std::string sroot;
1813 bool ret = m_Impl->spell(word, info, root ? &sroot : NULL);
1814 if (root) {
1815 if (sroot.empty()) {
1816 *root = NULL;
1817 } else {
1818 *root = mystrdup(sroot.c_str());
1819 }
1820 }
1821 return ret;
1822 }
1823
1824 namespace {
munge_vector(char *** slst,const std::vector<std::string> & items)1825 int munge_vector(char*** slst, const std::vector<std::string>& items) {
1826 if (items.empty()) {
1827 *slst = NULL;
1828 return 0;
1829 } else {
1830 *slst = (char**)malloc(sizeof(char*) * items.size());
1831 if (!*slst)
1832 return 0;
1833 for (size_t i = 0; i < items.size(); ++i)
1834 (*slst)[i] = mystrdup(items[i].c_str());
1835 }
1836 return items.size();
1837 }
1838 }
1839
free_list(char *** slst,int n)1840 void Hunspell::free_list(char*** slst, int n) {
1841 Hunspell_free_list((Hunhandle*)(this), slst, n);
1842 }
1843
suggest(char *** slst,const char * word)1844 int Hunspell::suggest(char*** slst, const char* word) {
1845 return Hunspell_suggest((Hunhandle*)(this), slst, word);
1846 }
1847
suffix_suggest(char *** slst,const char * root_word)1848 int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
1849 std::vector<std::string> stems = m_Impl->suffix_suggest(root_word);
1850 return munge_vector(slst, stems);
1851 }
1852
get_dic_encoding()1853 char* Hunspell::get_dic_encoding() {
1854 return &(m_Impl->dic_encoding_vec[0]);
1855 }
1856
stem(char *** slst,char ** desc,int n)1857 int Hunspell::stem(char*** slst, char** desc, int n) {
1858 return Hunspell_stem2((Hunhandle*)(this), slst, desc, n);
1859 }
1860
stem(char *** slst,const char * word)1861 int Hunspell::stem(char*** slst, const char* word) {
1862 return Hunspell_stem((Hunhandle*)(this), slst, word);
1863 }
1864
analyze(char *** slst,const char * word)1865 int Hunspell::analyze(char*** slst, const char* word) {
1866 return Hunspell_analyze((Hunhandle*)(this), slst, word);
1867 }
1868
generate(char *** slst,const char * word,char ** pl,int pln)1869 int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
1870 return Hunspell_generate2((Hunhandle*)(this), slst, word, pl, pln);
1871 }
1872
generate(char *** slst,const char * word,const char * pattern)1873 int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
1874 return Hunspell_generate((Hunhandle*)(this), slst, word, pattern);
1875 }
1876
Hunspell_create(const char * affpath,const char * dpath)1877 Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
1878 return (Hunhandle*)(new Hunspell(affpath, dpath));
1879 }
1880
Hunspell_create_key(const char * affpath,const char * dpath,const char * key)1881 Hunhandle* Hunspell_create_key(const char* affpath,
1882 const char* dpath,
1883 const char* key) {
1884 return reinterpret_cast<Hunhandle*>(new Hunspell(affpath, dpath, key));
1885 }
1886
Hunspell_destroy(Hunhandle * pHunspell)1887 void Hunspell_destroy(Hunhandle* pHunspell) {
1888 delete reinterpret_cast<Hunspell*>(pHunspell);
1889 }
1890
Hunspell_add_dic(Hunhandle * pHunspell,const char * dpath)1891 int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
1892 return reinterpret_cast<Hunspell*>(pHunspell)->add_dic(dpath);
1893 }
1894
Hunspell_spell(Hunhandle * pHunspell,const char * word)1895 int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
1896 return reinterpret_cast<Hunspell*>(pHunspell)->spell(std::string(word));
1897 }
1898
Hunspell_get_dic_encoding(Hunhandle * pHunspell)1899 char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
1900 return reinterpret_cast<Hunspell*>(pHunspell)->get_dic_encoding();
1901 }
1902
Hunspell_suggest(Hunhandle * pHunspell,char *** slst,const char * word)1903 int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
1904 std::vector<std::string> suggests = reinterpret_cast<Hunspell*>(pHunspell)->suggest(word);
1905 return munge_vector(slst, suggests);
1906 }
1907
Hunspell_analyze(Hunhandle * pHunspell,char *** slst,const char * word)1908 int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
1909 std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->analyze(word);
1910 return munge_vector(slst, stems);
1911 }
1912
Hunspell_stem(Hunhandle * pHunspell,char *** slst,const char * word)1913 int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
1914
1915 std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(word);
1916 return munge_vector(slst, stems);
1917 }
1918
Hunspell_stem2(Hunhandle * pHunspell,char *** slst,char ** desc,int n)1919 int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
1920 std::vector<std::string> morph;
1921 for (int i = 0; i < n; ++i)
1922 morph.push_back(desc[i]);
1923
1924 std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(morph);
1925 return munge_vector(slst, stems);
1926 }
1927
Hunspell_generate(Hunhandle * pHunspell,char *** slst,const char * word,const char * pattern)1928 int Hunspell_generate(Hunhandle* pHunspell,
1929 char*** slst,
1930 const char* word,
1931 const char* pattern) {
1932 std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, pattern);
1933 return munge_vector(slst, stems);
1934 }
1935
Hunspell_generate2(Hunhandle * pHunspell,char *** slst,const char * word,char ** desc,int n)1936 int Hunspell_generate2(Hunhandle* pHunspell,
1937 char*** slst,
1938 const char* word,
1939 char** desc,
1940 int n) {
1941 std::vector<std::string> morph;
1942 for (int i = 0; i < n; ++i)
1943 morph.push_back(desc[i]);
1944
1945 std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, morph);
1946 return munge_vector(slst, stems);
1947 }
1948
1949 /* functions for run-time modification of the dictionary */
1950
1951 /* add word to the run-time dictionary */
1952
Hunspell_add(Hunhandle * pHunspell,const char * word)1953 int Hunspell_add(Hunhandle* pHunspell, const char* word) {
1954 return reinterpret_cast<Hunspell*>(pHunspell)->add(word);
1955 }
1956
1957 /* add word to the run-time dictionary with affix flags of
1958 * the example (a dictionary word): Hunspell will recognize
1959 * affixed forms of the new word, too.
1960 */
1961
Hunspell_add_with_affix(Hunhandle * pHunspell,const char * word,const char * example)1962 int Hunspell_add_with_affix(Hunhandle* pHunspell,
1963 const char* word,
1964 const char* example) {
1965 return reinterpret_cast<Hunspell*>(pHunspell)->add_with_affix(word, example);
1966 }
1967
1968 /* remove word from the run-time dictionary */
1969
Hunspell_remove(Hunhandle * pHunspell,const char * word)1970 int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
1971 return reinterpret_cast<Hunspell*>(pHunspell)->remove(word);
1972 }
1973
Hunspell_free_list(Hunhandle *,char *** list,int n)1974 void Hunspell_free_list(Hunhandle*, char*** list, int n) {
1975 if (list && *list) {
1976 for (int i = 0; i < n; i++)
1977 free((*list)[i]);
1978 free(*list);
1979 *list = NULL;
1980 }
1981 }
1982
suffix_suggest(const std::string & root_word)1983 std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
1984 return m_Impl->suffix_suggest(root_word);
1985 }
1986
suffix_suggest(const std::string & root_word)1987 std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
1988 std::vector<std::string> slst;
1989 struct hentry* he = NULL;
1990 int len;
1991 std::string w2;
1992 const char* word;
1993 const char* ignoredchars = pAMgr->get_ignore();
1994 if (ignoredchars != NULL) {
1995 w2.assign(root_word);
1996 if (utf8) {
1997 const std::vector<w_char>& ignoredchars_utf16 =
1998 pAMgr->get_ignore_utf16();
1999 remove_ignored_chars_utf(w2, ignoredchars_utf16);
2000 } else {
2001 remove_ignored_chars(w2, ignoredchars);
2002 }
2003 word = w2.c_str();
2004 } else
2005 word = root_word.c_str();
2006
2007 len = strlen(word);
2008
2009 if (!len)
2010 return slst;
2011
2012 for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
2013 he = m_HMgrs[i]->lookup(word);
2014 }
2015 if (he) {
2016 slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());
2017 }
2018 return slst;
2019 }
2020