1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * Copyright (C) 2002-2017 Németh László
5  *
6  * The contents of this file are subject to the Mozilla Public License Version
7  * 1.1 (the "License"); you may not use this file except in compliance with
8  * the License. You may obtain a copy of the License at
9  * http://www.mozilla.org/MPL/
10  *
11  * Software distributed under the License is distributed on an "AS IS" basis,
12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13  * for the specific language governing rights and limitations under the
14  * License.
15  *
16  * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17  *
18  * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19  * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20  * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21  * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22  * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23  *
24  * Alternatively, the contents of this file may be used under the terms of
25  * either the GNU General Public License Version 2 or later (the "GPL"), or
26  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27  * in which case the provisions of the GPL or the LGPL are applicable instead
28  * of those above. If you wish to allow use of your version of this file only
29  * under the terms of either the GPL or the LGPL, and not to allow others to
30  * use your version of this file under the terms of the MPL, indicate your
31  * decision by deleting the provisions above and replace them with the notice
32  * and other provisions required by the GPL or the LGPL. If you do not delete
33  * the provisions above, a recipient may use your version of this file under
34  * the terms of any one of the MPL, the GPL or the LGPL.
35  *
36  * ***** END LICENSE BLOCK ***** */
37 /*
38  * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39  * And Contributors.  All rights reserved.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  *
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  *
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  *
52  * 3. All modifications to the source code must be clearly marked as
53  *    such.  Binary redistributions based on modified source code
54  *    must be clearly marked as modified versions in the documentation
55  *    and/or other materials provided with the distribution.
56  *
57  * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
61  * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  */
70 
71 #include <stdlib.h>
72 #include <string.h>
73 #include <stdio.h>
74 
75 #include "affixmgr.hxx"
76 #include "hunspell.hxx"
77 #include "suggestmgr.hxx"
78 #include "hunspell.h"
79 #include "csutil.hxx"
80 
81 #include <limits>
82 #include <string>
83 
84 #define MAXWORDUTF8LEN (MAXWORDLEN * 3)
85 
86 class HunspellImpl
87 {
88 public:
89   HunspellImpl(const char* affpath, const char* dpath, const char* key);
90   ~HunspellImpl();
91   int add_dic(const char* dpath, const char* key);
92   std::vector<std::string> suffix_suggest(const std::string& root_word);
93   std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
94   std::vector<std::string> generate(const std::string& word, const std::string& pattern);
95   std::vector<std::string> stem(const std::string& word);
96   std::vector<std::string> stem(const std::vector<std::string>& morph);
97   std::vector<std::string> analyze(const std::string& word);
98   int get_langnum() const;
99   bool input_conv(const std::string& word, std::string& dest);
100   bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
101   std::vector<std::string> suggest(const std::string& word);
102   const std::string& get_wordchars() const;
103   const std::vector<w_char>& get_wordchars_utf16() const;
104   const std::string& get_dict_encoding() const;
105   int add(const std::string& word);
106   int add_with_affix(const std::string& word, const std::string& example);
107   int remove(const std::string& word);
108   const std::string& get_version() const;
109   struct cs_info* get_csconv();
110   std::vector<char> dic_encoding_vec;
111 
112 private:
113   AffixMgr* pAMgr;
114   std::vector<HashMgr*> m_HMgrs;
115   SuggestMgr* pSMgr;
116   char* affixpath;
117   std::string encoding;
118   struct cs_info* csconv;
119   int langnum;
120   int utf8;
121   int complexprefixes;
122   std::vector<std::string> wordbreak;
123 
124 private:
125   void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
126   size_t cleanword2(std::string& dest,
127                     std::vector<w_char>& dest_u,
128                     const std::string& src,
129                     int* pcaptype,
130                     size_t* pabbrev);
131   void mkinitcap(std::string& u8);
132   int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
133   int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
134   void mkallcap(std::string& u8);
135   int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
136   struct hentry* checkword(const std::string& source, int* info, std::string* root);
137   std::string sharps_u8_l1(const std::string& source);
138   hentry*
139   spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);
140   int is_keepcase(const hentry* rv);
141   void insert_sug(std::vector<std::string>& slst, const std::string& word);
142   void cat_result(std::string& result, const std::string& st);
143   std::vector<std::string> spellml(const std::string& word);
144   std::string get_xml_par(const char* par);
145   const char* get_xml_pos(const char* s, const char* attr);
146   std::vector<std::string> get_xml_list(const char* list, const char* tag);
147   int check_xml_par(const char* q, const char* attr, const char* value);
148 private:
149   HunspellImpl(const HunspellImpl&);
150   HunspellImpl& operator=(const HunspellImpl&);
151 };
152 
Hunspell(const char * affpath,const char * dpath,const char * key)153 Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
154   : m_Impl(new HunspellImpl(affpath, dpath, key)) {
155 }
156 
HunspellImpl(const char * affpath,const char * dpath,const char * key)157 HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) {
158   csconv = NULL;
159   utf8 = 0;
160   complexprefixes = 0;
161   affixpath = mystrdup(affpath);
162 
163   /* first set up the hash manager */
164   m_HMgrs.push_back(new HashMgr(dpath, affpath, key));
165 
166   /* next set up the affix manager */
167   /* it needs access to the hash manager lookup methods */
168   pAMgr = new AffixMgr(affpath, m_HMgrs, key);
169 
170   /* get the preferred try string and the dictionary */
171   /* encoding from the Affix Manager for that dictionary */
172   char* try_string = pAMgr->get_try_string();
173   encoding = pAMgr->get_encoding();
174   langnum = pAMgr->get_langnum();
175   utf8 = pAMgr->get_utf8();
176   if (!utf8)
177     csconv = get_current_cs(encoding);
178   complexprefixes = pAMgr->get_complexprefixes();
179   wordbreak = pAMgr->get_breaktable();
180 
181   dic_encoding_vec.resize(encoding.size()+1);
182   strcpy(&dic_encoding_vec[0], encoding.c_str());
183 
184   /* and finally set up the suggestion manager */
185   pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
186   if (try_string)
187     free(try_string);
188 }
189 
~Hunspell()190 Hunspell::~Hunspell() {
191   delete m_Impl;
192 }
193 
~HunspellImpl()194 HunspellImpl::~HunspellImpl() {
195   delete pSMgr;
196   delete pAMgr;
197   for (size_t i = 0; i < m_HMgrs.size(); ++i)
198     delete m_HMgrs[i];
199   pSMgr = NULL;
200   pAMgr = NULL;
201 #ifdef MOZILLA_CLIENT
202   delete[] csconv;
203 #endif
204   csconv = NULL;
205   if (affixpath)
206     free(affixpath);
207   affixpath = NULL;
208 }
209 
210 // load extra dictionaries
add_dic(const char * dpath,const char * key)211 int Hunspell::add_dic(const char* dpath, const char* key) {
212   return m_Impl->add_dic(dpath, key);
213 }
214 
215 // load extra dictionaries
add_dic(const char * dpath,const char * key)216 int HunspellImpl::add_dic(const char* dpath, const char* key) {
217   if (!affixpath)
218     return 1;
219   m_HMgrs.push_back(new HashMgr(dpath, affixpath, key));
220   return 0;
221 }
222 
223 // make a copy of src at destination while removing all leading
224 // blanks and removing any trailing periods after recording
225 // their presence with the abbreviation flag
226 // also since already going through character by character,
227 // set the capitalization type
228 // return the length of the "cleaned" (and UTF-8 encoded) word
229 
cleanword2(std::string & dest,std::vector<w_char> & dest_utf,const std::string & src,int * pcaptype,size_t * pabbrev)230 size_t HunspellImpl::cleanword2(std::string& dest,
231                          std::vector<w_char>& dest_utf,
232                          const std::string& src,
233                          int* pcaptype,
234                          size_t* pabbrev) {
235   dest.clear();
236   dest_utf.clear();
237 
238   const char* q = src.c_str();
239 
240   // first skip over any leading blanks
241   while (*q == ' ')
242     ++q;
243 
244   // now strip off any trailing periods (recording their presence)
245   *pabbrev = 0;
246   int nl = strlen(q);
247   while ((nl > 0) && (*(q + nl - 1) == '.')) {
248     nl--;
249     (*pabbrev)++;
250   }
251 
252   // if no characters are left it can't be capitalized
253   if (nl <= 0) {
254     *pcaptype = NOCAP;
255     return 0;
256   }
257 
258   dest.append(q, nl);
259   nl = dest.size();
260   if (utf8) {
261     u8_u16(dest_utf, dest);
262     *pcaptype = get_captype_utf8(dest_utf, langnum);
263   } else {
264     *pcaptype = get_captype(dest, csconv);
265   }
266   return nl;
267 }
268 
cleanword(std::string & dest,const std::string & src,int * pcaptype,int * pabbrev)269 void HunspellImpl::cleanword(std::string& dest,
270                         const std::string& src,
271                         int* pcaptype,
272                         int* pabbrev) {
273   dest.clear();
274   const unsigned char* q = (const unsigned char*)src.c_str();
275   int firstcap = 0;
276 
277   // first skip over any leading blanks
278   while (*q == ' ')
279     ++q;
280 
281   // now strip off any trailing periods (recording their presence)
282   *pabbrev = 0;
283   int nl = strlen((const char*)q);
284   while ((nl > 0) && (*(q + nl - 1) == '.')) {
285     nl--;
286     (*pabbrev)++;
287   }
288 
289   // if no characters are left it can't be capitalized
290   if (nl <= 0) {
291     *pcaptype = NOCAP;
292     return;
293   }
294 
295   // now determine the capitalization type of the first nl letters
296   int ncap = 0;
297   int nneutral = 0;
298   int nc = 0;
299 
300   if (!utf8) {
301     while (nl > 0) {
302       nc++;
303       if (csconv[(*q)].ccase)
304         ncap++;
305       if (csconv[(*q)].cupper == csconv[(*q)].clower)
306         nneutral++;
307       dest.push_back(*q++);
308       nl--;
309     }
310     // remember to terminate the destination string
311     firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
312   } else {
313     std::vector<w_char> t;
314     u8_u16(t, src);
315     for (size_t i = 0; i < t.size(); ++i) {
316       unsigned short idx = (t[i].h << 8) + t[i].l;
317       unsigned short low = unicodetolower(idx, langnum);
318       if (idx != low)
319         ncap++;
320       if (unicodetoupper(idx, langnum) == low)
321         nneutral++;
322     }
323     u16_u8(dest, t);
324     if (ncap) {
325       unsigned short idx = (t[0].h << 8) + t[0].l;
326       firstcap = (idx != unicodetolower(idx, langnum));
327     }
328   }
329 
330   // now finally set the captype
331   if (ncap == 0) {
332     *pcaptype = NOCAP;
333   } else if ((ncap == 1) && firstcap) {
334     *pcaptype = INITCAP;
335   } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
336     *pcaptype = ALLCAP;
337   } else if ((ncap > 1) && firstcap) {
338     *pcaptype = HUHINITCAP;
339   } else {
340     *pcaptype = HUHCAP;
341   }
342 }
343 
mkallcap(std::string & u8)344 void HunspellImpl::mkallcap(std::string& u8) {
345   if (utf8) {
346     std::vector<w_char> u16;
347     u8_u16(u16, u8);
348     ::mkallcap_utf(u16, langnum);
349     u16_u8(u8, u16);
350   } else {
351     ::mkallcap(u8, csconv);
352   }
353 }
354 
mkallsmall2(std::string & u8,std::vector<w_char> & u16)355 int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
356   if (utf8) {
357     ::mkallsmall_utf(u16, langnum);
358     u16_u8(u8, u16);
359   } else {
360     ::mkallsmall(u8, csconv);
361   }
362   return u8.size();
363 }
364 
365 // convert UTF-8 sharp S codes to latin 1
sharps_u8_l1(const std::string & source)366 std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
367   std::string dest(source);
368   mystrrep(dest, "\xC3\x9F", "\xDF");
369   return dest;
370 }
371 
372 // recursive search for right ss - sharp s permutations
spellsharps(std::string & base,size_t n_pos,int n,int repnum,int * info,std::string * root)373 hentry* HunspellImpl::spellsharps(std::string& base,
374                               size_t n_pos,
375                               int n,
376                               int repnum,
377                               int* info,
378                               std::string* root) {
379   size_t pos = base.find("ss", n_pos);
380   if (pos != std::string::npos && (n < MAXSHARPS)) {
381     base[pos] = '\xC3';
382     base[pos + 1] = '\x9F';
383     hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
384     if (h)
385       return h;
386     base[pos] = 's';
387     base[pos + 1] = 's';
388     h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
389     if (h)
390       return h;
391   } else if (repnum > 0) {
392     if (utf8)
393       return checkword(base, info, root);
394     std::string tmp(sharps_u8_l1(base));
395     return checkword(tmp, info, root);
396   }
397   return NULL;
398 }
399 
is_keepcase(const hentry * rv)400 int HunspellImpl::is_keepcase(const hentry* rv) {
401   return pAMgr && rv->astr && pAMgr->get_keepcase() &&
402          TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
403 }
404 
405 /* insert a word to the beginning of the suggestion array */
insert_sug(std::vector<std::string> & slst,const std::string & word)406 void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
407   slst.insert(slst.begin(), word);
408 }
409 
spell(const std::string & word,int * info,std::string * root)410 bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
411   return m_Impl->spell(word, info, root);
412 }
413 
spell(const std::string & word,int * info,std::string * root)414 bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {
415   struct hentry* rv = NULL;
416 
417   int info2 = 0;
418   if (!info)
419     info = &info2;
420   else
421     *info = 0;
422 
423   // Hunspell supports XML input of the simplified API (see manual)
424   if (word == SPELL_XML)
425     return true;
426   if (utf8) {
427     if (word.size() >= MAXWORDUTF8LEN)
428       return false;
429   } else {
430     if (word.size() >= MAXWORDLEN)
431       return false;
432   }
433   int captype = NOCAP;
434   size_t abbv = 0;
435   size_t wl = 0;
436 
437   std::string scw;
438   std::vector<w_char> sunicw;
439 
440   // input conversion
441   RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
442   {
443     std::string wspace;
444 
445     bool convstatus = rl ? rl->conv(word, wspace) : false;
446     if (convstatus)
447       wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
448     else
449       wl = cleanword2(scw, sunicw, word, &captype, &abbv);
450   }
451 
452 #ifdef MOZILLA_CLIENT
453   // accept the abbreviated words without dots
454   // workaround for the incomplete tokenization of Mozilla
455   abbv = 1;
456 #endif
457 
458   if (wl == 0 || m_HMgrs.empty())
459     return true;
460   if (root)
461     root->clear();
462 
463   // allow numbers with dots, dashes and commas (but forbid double separators:
464   // "..", "--" etc.)
465   enum { NBEGIN, NNUM, NSEP };
466   int nstate = NBEGIN;
467   size_t i;
468 
469   for (i = 0; (i < wl); i++) {
470     if ((scw[i] <= '9') && (scw[i] >= '0')) {
471       nstate = NNUM;
472     } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
473       if ((nstate == NSEP) || (i == 0))
474         break;
475       nstate = NSEP;
476     } else
477       break;
478   }
479   if ((i == wl) && (nstate == NNUM))
480     return true;
481 
482   switch (captype) {
483     case HUHCAP:
484     /* FALLTHROUGH */
485     case HUHINITCAP:
486       *info += SPELL_ORIGCAP;
487     /* FALLTHROUGH */
488     case NOCAP:
489       rv = checkword(scw, info, root);
490       if ((abbv) && !(rv)) {
491         std::string u8buffer(scw);
492         u8buffer.push_back('.');
493         rv = checkword(u8buffer, info, root);
494       }
495       break;
496     case ALLCAP: {
497       *info += SPELL_ORIGCAP;
498       rv = checkword(scw, info, root);
499       if (rv)
500         break;
501       if (abbv) {
502         std::string u8buffer(scw);
503         u8buffer.push_back('.');
504         rv = checkword(u8buffer, info, root);
505         if (rv)
506           break;
507       }
508       // Spec. prefix handling for Catalan, French, Italian:
509       // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
510       size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
511       if (apos != std::string::npos) {
512         mkallsmall2(scw, sunicw);
513         //conversion may result in string with different len to pre-mkallsmall2
514         //so re-scan
515         if (apos != std::string::npos && apos < scw.size() - 1) {
516           std::string part1 = scw.substr(0, apos+1);
517           std::string part2 = scw.substr(apos+1);
518           if (utf8) {
519             std::vector<w_char> part1u, part2u;
520             u8_u16(part1u, part1);
521             u8_u16(part2u, part2);
522             mkinitcap2(part2, part2u);
523             scw = part1 + part2;
524             sunicw = part1u;
525             sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
526             rv = checkword(scw, info, root);
527             if (rv)
528               break;
529           } else {
530             mkinitcap2(part2, sunicw);
531             scw = part1 + part2;
532             rv = checkword(scw, info, root);
533             if (rv)
534               break;
535           }
536           mkinitcap2(scw, sunicw);
537           rv = checkword(scw, info, root);
538           if (rv)
539             break;
540         }
541       }
542       if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
543 
544         mkallsmall2(scw, sunicw);
545         std::string u8buffer(scw);
546         rv = spellsharps(u8buffer, 0, 0, 0, info, root);
547         if (!rv) {
548           mkinitcap2(scw, sunicw);
549           rv = spellsharps(scw, 0, 0, 0, info, root);
550         }
551         if ((abbv) && !(rv)) {
552           u8buffer.push_back('.');
553           rv = spellsharps(u8buffer, 0, 0, 0, info, root);
554           if (!rv) {
555             u8buffer = std::string(scw);
556             u8buffer.push_back('.');
557             rv = spellsharps(u8buffer, 0, 0, 0, info, root);
558           }
559         }
560         if (rv)
561           break;
562       }
563     }
564     case INITCAP: {
565 
566       *info += SPELL_ORIGCAP;
567       mkallsmall2(scw, sunicw);
568       std::string u8buffer(scw);
569       mkinitcap2(scw, sunicw);
570       if (captype == INITCAP)
571         *info += SPELL_INITCAP;
572       rv = checkword(scw, info, root);
573       if (captype == INITCAP)
574         *info -= SPELL_INITCAP;
575       // forbid bad capitalization
576       // (for example, ijs -> Ijs instead of IJs in Dutch)
577       // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
578       if (*info & SPELL_FORBIDDEN) {
579         rv = NULL;
580         break;
581       }
582       if (rv && is_keepcase(rv) && (captype == ALLCAP))
583         rv = NULL;
584       if (rv)
585         break;
586 
587       rv = checkword(u8buffer, info, root);
588       if (abbv && !rv) {
589         u8buffer.push_back('.');
590         rv = checkword(u8buffer, info, root);
591         if (!rv) {
592           u8buffer = scw;
593           u8buffer.push_back('.');
594           if (captype == INITCAP)
595             *info += SPELL_INITCAP;
596           rv = checkword(u8buffer, info, root);
597           if (captype == INITCAP)
598             *info -= SPELL_INITCAP;
599           if (rv && is_keepcase(rv) && (captype == ALLCAP))
600             rv = NULL;
601           break;
602         }
603       }
604       if (rv && is_keepcase(rv) &&
605           ((captype == ALLCAP) ||
606            // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
607            // in INITCAP form, too.
608            !(pAMgr->get_checksharps() &&
609              ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
610               (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
611         rv = NULL;
612       break;
613     }
614   }
615 
616   if (rv) {
617     if (pAMgr && pAMgr->get_warn() && rv->astr &&
618         TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
619       *info += SPELL_WARN;
620       if (pAMgr->get_forbidwarn())
621         return false;
622       return true;
623     }
624     return true;
625   }
626 
627   // recursive breaking at break points
628   if (!wordbreak.empty()) {
629 
630     int nbr = 0;
631     wl = scw.size();
632 
633     // calculate break points for recursion limit
634     for (size_t j = 0; j < wordbreak.size(); ++j) {
635       size_t pos = 0;
636       while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) {
637         ++nbr;
638         pos += wordbreak[j].size();
639       }
640     }
641     if (nbr >= 10)
642       return false;
643 
644     // check boundary patterns (^begin and end$)
645     for (size_t j = 0; j < wordbreak.size(); ++j) {
646       size_t plen = wordbreak[j].size();
647       if (plen == 1 || plen > wl)
648         continue;
649 
650       if (wordbreak[j][0] == '^' &&
651           scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1)))
652         return true;
653 
654       if (wordbreak[j][plen - 1] == '$' &&
655           scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) {
656         std::string suffix(scw.substr(wl - plen + 1));
657         scw.resize(wl - plen + 1);
658         if (spell(scw))
659           return true;
660         scw.append(suffix);
661       }
662     }
663 
664     // other patterns
665     for (size_t j = 0; j < wordbreak.size(); ++j) {
666       size_t plen = wordbreak[j].size();
667       size_t found = scw.find(wordbreak[j]);
668       if ((found > 0) && (found < wl - plen)) {
669         if (!spell(scw.substr(found + plen)))
670           continue;
671         std::string suffix(scw.substr(found));
672         scw.resize(found);
673         // examine 2 sides of the break point
674         if (spell(scw))
675           return true;
676         scw.append(suffix);
677 
678         // LANG_hu: spec. dash rule
679         if (langnum == LANG_hu && wordbreak[j] == "-") {
680           suffix = scw.substr(found + 1);
681           scw.resize(found + 1);
682           if (spell(scw))
683             return true;  // check the first part with dash
684           scw.append(suffix);
685         }
686         // end of LANG specific region
687       }
688     }
689   }
690 
691   return false;
692 }
693 
checkword(const std::string & w,int * info,std::string * root)694 struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
695   bool usebuffer = false;
696   std::string w2;
697   const char* word;
698   int len;
699 
700   const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
701   if (ignoredchars != NULL) {
702     w2.assign(w);
703     if (utf8) {
704       const std::vector<w_char>& ignoredchars_utf16 =
705           pAMgr->get_ignore_utf16();
706       remove_ignored_chars_utf(w2, ignoredchars_utf16);
707     } else {
708       remove_ignored_chars(w2, ignoredchars);
709     }
710     word = w2.c_str();
711     len = w2.size();
712     usebuffer = true;
713   } else {
714     word = w.c_str();
715     len = w.size();
716   }
717 
718   if (!len)
719     return NULL;
720 
721   // word reversing wrapper for complex prefixes
722   if (complexprefixes) {
723     if (!usebuffer) {
724       w2.assign(word);
725       usebuffer = true;
726     }
727     if (utf8)
728       reverseword_utf(w2);
729     else
730       reverseword(w2);
731   }
732 
733   if (usebuffer) {
734     word = w2.c_str();
735   }
736 
737   // look word in hash table
738   struct hentry* he = NULL;
739   for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
740     he = m_HMgrs[i]->lookup(word);
741 
742     // check forbidden and onlyincompound words
743     if ((he) && (he->astr) && (pAMgr) &&
744         TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
745       if (info)
746         *info += SPELL_FORBIDDEN;
747       // LANG_hu section: set dash information for suggestions
748       if (langnum == LANG_hu) {
749         if (pAMgr->get_compoundflag() &&
750             TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
751           if (info)
752             *info += SPELL_COMPOUND;
753         }
754       }
755       return NULL;
756     }
757 
758     // he = next not needaffix, onlyincompound homonym or onlyupcase word
759     while (he && (he->astr) && pAMgr &&
760            ((pAMgr->get_needaffix() &&
761              TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
762             (pAMgr->get_onlyincompound() &&
763              TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
764             (info && (*info & SPELL_INITCAP) &&
765              TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
766       he = he->next_homonym;
767   }
768 
769   // check with affixes
770   if (!he && pAMgr) {
771     // try stripping off affixes */
772     he = pAMgr->affix_check(word, len, 0);
773 
774     // check compound restriction and onlyupcase
775     if (he && he->astr &&
776         ((pAMgr->get_onlyincompound() &&
777           TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
778          (info && (*info & SPELL_INITCAP) &&
779           TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
780       he = NULL;
781     }
782 
783     if (he) {
784       if ((he->astr) && (pAMgr) &&
785           TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
786         if (info)
787           *info += SPELL_FORBIDDEN;
788         return NULL;
789       }
790       if (root) {
791         root->assign(he->word);
792         if (complexprefixes) {
793           if (utf8)
794             reverseword_utf(*root);
795           else
796             reverseword(*root);
797         }
798       }
799       // try check compound word
800     } else if (pAMgr->get_compound()) {
801       struct hentry* rwords[100];  // buffer for COMPOUND pattern checking
802       he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
803       // LANG_hu section: `moving rule' with last dash
804       if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
805         std::string dup(word, len - 1);
806         he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info);
807       }
808       // end of LANG specific region
809       if (he) {
810         if (root) {
811           root->assign(he->word);
812           if (complexprefixes) {
813             if (utf8)
814               reverseword_utf(*root);
815             else
816               reverseword(*root);
817           }
818         }
819         if (info)
820           *info += SPELL_COMPOUND;
821       }
822     }
823   }
824 
825   return he;
826 }
827 
suggest(const std::string & word)828 std::vector<std::string> Hunspell::suggest(const std::string& word) {
829   return m_Impl->suggest(word);
830 }
831 
suggest(const std::string & word)832 std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
833   std::vector<std::string> slst;
834 
835   int onlycmpdsug = 0;
836   if (!pSMgr || m_HMgrs.empty())
837     return slst;
838 
839   // process XML input of the simplified API (see manual)
840   if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
841     return spellml(word);
842   }
843   if (utf8) {
844     if (word.size() >= MAXWORDUTF8LEN)
845       return slst;
846   } else {
847     if (word.size() >= MAXWORDLEN)
848       return slst;
849   }
850   int captype = NOCAP;
851   size_t abbv = 0;
852   size_t wl = 0;
853 
854   std::string scw;
855   std::vector<w_char> sunicw;
856 
857   // input conversion
858   RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
859   {
860     std::string wspace;
861 
862     bool convstatus = rl ? rl->conv(word, wspace) : false;
863     if (convstatus)
864       wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
865     else
866       wl = cleanword2(scw, sunicw, word, &captype, &abbv);
867 
868     if (wl == 0)
869       return slst;
870   }
871 
872   int capwords = 0;
873 
874   // check capitalized form for FORCEUCASE
875   if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
876     int info = SPELL_ORIGCAP;
877     if (checkword(scw, &info, NULL)) {
878       std::string form(scw);
879       mkinitcap(form);
880       slst.push_back(form);
881       return slst;
882     }
883   }
884 
885   switch (captype) {
886     case NOCAP: {
887       pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
888       break;
889     }
890 
891     case INITCAP: {
892       capwords = 1;
893       pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
894       std::string wspace(scw);
895       mkallsmall2(wspace, sunicw);
896       pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
897       break;
898     }
899     case HUHINITCAP:
900       capwords = 1;
901     case HUHCAP: {
902       pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
903       // something.The -> something. The
904       size_t dot_pos = scw.find('.');
905       if (dot_pos != std::string::npos) {
906         std::string postdot = scw.substr(dot_pos + 1);
907         int captype_;
908         if (utf8) {
909           std::vector<w_char> postdotu;
910           u8_u16(postdotu, postdot);
911           captype_ = get_captype_utf8(postdotu, langnum);
912         } else {
913           captype_ = get_captype(postdot, csconv);
914         }
915         if (captype_ == INITCAP) {
916           std::string str(scw);
917           str.insert(dot_pos + 1, 1, ' ');
918           insert_sug(slst, str);
919         }
920       }
921 
922       std::string wspace;
923 
924       if (captype == HUHINITCAP) {
925         // TheOpenOffice.org -> The OpenOffice.org
926         wspace = scw;
927         mkinitsmall2(wspace, sunicw);
928         pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
929       }
930       wspace = scw;
931       mkallsmall2(wspace, sunicw);
932       if (spell(wspace.c_str()))
933         insert_sug(slst, wspace);
934       size_t prevns = slst.size();
935       pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
936       if (captype == HUHINITCAP) {
937         mkinitcap2(wspace, sunicw);
938         if (spell(wspace.c_str()))
939           insert_sug(slst, wspace);
940         pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
941       }
942       // aNew -> "a New" (instead of "a new")
943       for (size_t j = prevns; j < slst.size(); ++j) {
944         const char* space = strchr(slst[j].c_str(), ' ');
945         if (space) {
946           size_t slen = strlen(space + 1);
947           // different case after space (need capitalisation)
948           if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
949             std::string first(slst[j].c_str(), space + 1);
950             std::string second(space + 1);
951             std::vector<w_char> w;
952             if (utf8)
953               u8_u16(w, second);
954             mkinitcap2(second, w);
955             // set as first suggestion
956             slst.erase(slst.begin() + j);
957             slst.insert(slst.begin(), first + second);
958           }
959         }
960       }
961       break;
962     }
963 
964     case ALLCAP: {
965       std::string wspace(scw);
966       mkallsmall2(wspace, sunicw);
967       pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
968       if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
969         insert_sug(slst, wspace);
970       mkinitcap2(wspace, sunicw);
971       pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
972       for (size_t j = 0; j < slst.size(); ++j) {
973         mkallcap(slst[j]);
974         if (pAMgr && pAMgr->get_checksharps()) {
975           if (utf8) {
976             mystrrep(slst[j], "\xC3\x9F", "SS");
977           } else {
978             mystrrep(slst[j], "\xDF", "SS");
979           }
980         }
981       }
982       break;
983     }
984   }
985 
986   // LANG_hu section: replace '-' with ' ' in Hungarian
987   if (langnum == LANG_hu) {
988     for (size_t j = 0; j < slst.size(); ++j) {
989       size_t pos = slst[j].find('-');
990       if (pos != std::string::npos) {
991         int info;
992         std::string w(slst[j].substr(0, pos));
993         w.append(slst[j].substr(pos + 1));
994         (void)spell(w, &info, NULL);
995         if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
996           slst[j][pos] = ' ';
997         } else
998           slst[j][pos] = '-';
999       }
1000     }
1001   }
1002   // END OF LANG_hu section
1003 
1004   // try ngram approach since found nothing or only compound words
1005   if (pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
1006     switch (captype) {
1007       case NOCAP: {
1008         pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs);
1009         break;
1010       }
1011       case HUHINITCAP:
1012         capwords = 1;
1013       case HUHCAP: {
1014         std::string wspace(scw);
1015         mkallsmall2(wspace, sunicw);
1016         pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
1017         break;
1018       }
1019       case INITCAP: {
1020         capwords = 1;
1021         std::string wspace(scw);
1022         mkallsmall2(wspace, sunicw);
1023         pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
1024         break;
1025       }
1026       case ALLCAP: {
1027         std::string wspace(scw);
1028         mkallsmall2(wspace, sunicw);
1029         size_t oldns = slst.size();
1030         pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs);
1031         for (size_t j = oldns; j < slst.size(); ++j) {
1032           mkallcap(slst[j]);
1033         }
1034         break;
1035       }
1036     }
1037   }
1038 
1039   // try dash suggestion (Afo-American -> Afro-American)
1040   size_t dash_pos = scw.find('-');
1041   if (dash_pos != std::string::npos) {
1042     int nodashsug = 1;
1043     for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
1044       if (slst[j].find('-') != std::string::npos)
1045         nodashsug = 0;
1046     }
1047 
1048     size_t prev_pos = 0;
1049     bool last = false;
1050 
1051     while (nodashsug && !last) {
1052       if (dash_pos == scw.size())
1053         last = 1;
1054       std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
1055       if (!spell(chunk.c_str())) {
1056         std::vector<std::string> nlst = suggest(chunk.c_str());
1057         for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) {
1058           std::string wspace = scw.substr(0, prev_pos);
1059           wspace.append(*j);
1060           if (!last) {
1061             wspace.append("-");
1062             wspace.append(scw.substr(dash_pos + 1));
1063           }
1064           insert_sug(slst, wspace);
1065         }
1066         nodashsug = 0;
1067       }
1068       if (!last) {
1069         prev_pos = dash_pos + 1;
1070         dash_pos = scw.find('-', prev_pos);
1071       }
1072       if (dash_pos == std::string::npos)
1073         dash_pos = scw.size();
1074     }
1075   }
1076 
1077   // word reversing wrapper for complex prefixes
1078   if (complexprefixes) {
1079     for (size_t j = 0; j < slst.size(); ++j) {
1080       if (utf8)
1081         reverseword_utf(slst[j]);
1082       else
1083         reverseword(slst[j]);
1084     }
1085   }
1086 
1087   // capitalize
1088   if (capwords)
1089     for (size_t j = 0; j < slst.size(); ++j) {
1090       mkinitcap(slst[j]);
1091     }
1092 
1093   // expand suggestions with dot(s)
1094   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
1095     for (size_t j = 0; j < slst.size(); ++j) {
1096       slst[j].append(word.substr(word.size() - abbv));
1097     }
1098   }
1099 
1100   // remove bad capitalized and forbidden forms
1101   if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
1102     switch (captype) {
1103       case INITCAP:
1104       case ALLCAP: {
1105         size_t l = 0;
1106         for (size_t j = 0; j < slst.size(); ++j) {
1107           if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
1108             std::string s;
1109             std::vector<w_char> w;
1110             if (utf8) {
1111               u8_u16(w, slst[j]);
1112             } else {
1113               s = slst[j];
1114             }
1115             mkallsmall2(s, w);
1116             if (spell(s)) {
1117               slst[l] = s;
1118               ++l;
1119             } else {
1120               mkinitcap2(s, w);
1121               if (spell(s)) {
1122                 slst[l] = s;
1123                 ++l;
1124               }
1125             }
1126           } else {
1127             slst[l] = slst[j];
1128             ++l;
1129           }
1130         }
1131         slst.resize(l);
1132       }
1133     }
1134   }
1135 
1136   // remove duplications
1137   size_t l = 0;
1138   for (size_t j = 0; j < slst.size(); ++j) {
1139     slst[l] = slst[j];
1140     for (size_t k = 0; k < l; ++k) {
1141       if (slst[k] == slst[j]) {
1142         --l;
1143         break;
1144       }
1145     }
1146     ++l;
1147   }
1148   slst.resize(l);
1149 
1150   // output conversion
1151   rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1152   for (size_t j = 0; rl && j < slst.size(); ++j) {
1153     std::string wspace;
1154     if (rl->conv(slst[j], wspace)) {
1155       slst[j] = wspace;
1156     }
1157   }
1158 
1159   return slst;
1160 }
1161 
get_dict_encoding() const1162 const std::string& Hunspell::get_dict_encoding() const {
1163   return m_Impl->get_dict_encoding();
1164 }
1165 
get_dict_encoding() const1166 const std::string& HunspellImpl::get_dict_encoding() const {
1167   return encoding;
1168 }
1169 
stem(const std::vector<std::string> & desc)1170 std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
1171   return m_Impl->stem(desc);
1172 }
1173 
stem(const std::vector<std::string> & desc)1174 std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
1175   std::vector<std::string> slst;
1176 
1177   std::string result2;
1178   if (desc.empty())
1179     return slst;
1180   for (size_t i = 0; i < desc.size(); ++i) {
1181 
1182     std::string result;
1183 
1184     // add compound word parts (except the last one)
1185     const char* s = desc[i].c_str();
1186     const char* part = strstr(s, MORPH_PART);
1187     if (part) {
1188       const char* nextpart = strstr(part + 1, MORPH_PART);
1189       while (nextpart) {
1190         std::string field;
1191         copy_field(field, part, MORPH_PART);
1192         result.append(field);
1193         part = nextpart;
1194         nextpart = strstr(part + 1, MORPH_PART);
1195       }
1196       s = part;
1197     }
1198 
1199     std::string tok(s);
1200     size_t alt = 0;
1201     while ((alt = tok.find(" | ", alt)) != std::string::npos) {
1202       tok[alt + 1] = MSEP_ALT;
1203     }
1204     std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
1205     for (size_t k = 0; k < pl.size(); ++k) {
1206       // add derivational suffixes
1207       if (pl[k].find(MORPH_DERI_SFX) != std::string::npos) {
1208         // remove inflectional suffixes
1209         const size_t is = pl[k].find(MORPH_INFL_SFX);
1210         if (is != std::string::npos)
1211           pl[k].resize(is);
1212         std::vector<std::string> singlepl;
1213         singlepl.push_back(pl[k]);
1214         std::string sg = pSMgr->suggest_gen(singlepl, pl[k]);
1215         if (!sg.empty()) {
1216           std::vector<std::string> gen = line_tok(sg, MSEP_REC);
1217           for (size_t j = 0; j < gen.size(); ++j) {
1218             result2.push_back(MSEP_REC);
1219             result2.append(result);
1220             result2.append(gen[j]);
1221           }
1222         }
1223       } else {
1224         result2.push_back(MSEP_REC);
1225         result2.append(result);
1226         if (pl[k].find(MORPH_SURF_PFX) != std::string::npos) {
1227           std::string field;
1228           copy_field(field, pl[k], MORPH_SURF_PFX);
1229           result2.append(field);
1230         }
1231         std::string field;
1232         copy_field(field, pl[k], MORPH_STEM);
1233         result2.append(field);
1234       }
1235     }
1236   }
1237   slst = line_tok(result2, MSEP_REC);
1238   uniqlist(slst);
1239   return slst;
1240 }
1241 
stem(const std::string & word)1242 std::vector<std::string> Hunspell::stem(const std::string& word) {
1243   return m_Impl->stem(word);
1244 }
1245 
stem(const std::string & word)1246 std::vector<std::string> HunspellImpl::stem(const std::string& word) {
1247   return stem(analyze(word));
1248 }
1249 
get_wordchars() const1250 const char* Hunspell::get_wordchars() const {
1251   return m_Impl->get_wordchars().c_str();
1252 }
1253 
get_wordchars_cpp() const1254 const std::string& Hunspell::get_wordchars_cpp() const {
1255   return m_Impl->get_wordchars();
1256 }
1257 
get_wordchars() const1258 const std::string& HunspellImpl::get_wordchars() const {
1259   return pAMgr->get_wordchars();
1260 }
1261 
get_wordchars_utf16() const1262 const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
1263   return m_Impl->get_wordchars_utf16();
1264 }
1265 
get_wordchars_utf16() const1266 const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
1267   return pAMgr->get_wordchars_utf16();
1268 }
1269 
mkinitcap(std::string & u8)1270 void HunspellImpl::mkinitcap(std::string& u8) {
1271   if (utf8) {
1272     std::vector<w_char> u16;
1273     u8_u16(u16, u8);
1274     ::mkinitcap_utf(u16, langnum);
1275     u16_u8(u8, u16);
1276   } else {
1277     ::mkinitcap(u8, csconv);
1278   }
1279 }
1280 
mkinitcap2(std::string & u8,std::vector<w_char> & u16)1281 int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
1282   if (utf8) {
1283     ::mkinitcap_utf(u16, langnum);
1284     u16_u8(u8, u16);
1285   } else {
1286     ::mkinitcap(u8, csconv);
1287   }
1288   return u8.size();
1289 }
1290 
mkinitsmall2(std::string & u8,std::vector<w_char> & u16)1291 int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
1292   if (utf8) {
1293     ::mkinitsmall_utf(u16, langnum);
1294     u16_u8(u8, u16);
1295   } else {
1296     ::mkinitsmall(u8, csconv);
1297   }
1298   return u8.size();
1299 }
1300 
add(const std::string & word)1301 int Hunspell::add(const std::string& word) {
1302   return m_Impl->add(word);
1303 }
1304 
add(const std::string & word)1305 int HunspellImpl::add(const std::string& word) {
1306   if (!m_HMgrs.empty())
1307     return m_HMgrs[0]->add(word);
1308   return 0;
1309 }
1310 
add_with_affix(const std::string & word,const std::string & example)1311 int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
1312   return m_Impl->add_with_affix(word, example);
1313 }
1314 
add_with_affix(const std::string & word,const std::string & example)1315 int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
1316   if (!m_HMgrs.empty())
1317     return m_HMgrs[0]->add_with_affix(word, example);
1318   return 0;
1319 }
1320 
remove(const std::string & word)1321 int Hunspell::remove(const std::string& word) {
1322   return m_Impl->remove(word);
1323 }
1324 
remove(const std::string & word)1325 int HunspellImpl::remove(const std::string& word) {
1326   if (!m_HMgrs.empty())
1327     return m_HMgrs[0]->remove(word);
1328   return 0;
1329 }
1330 
get_version() const1331 const char* Hunspell::get_version() const {
1332   return m_Impl->get_version().c_str();
1333 }
1334 
get_version_cpp() const1335 const std::string& Hunspell::get_version_cpp() const {
1336   return m_Impl->get_version();
1337 }
1338 
get_version() const1339 const std::string& HunspellImpl::get_version() const {
1340   return pAMgr->get_version();
1341 }
1342 
get_csconv()1343 struct cs_info* HunspellImpl::get_csconv() {
1344   return csconv;
1345 }
1346 
get_csconv()1347 struct cs_info* Hunspell::get_csconv() {
1348   return m_Impl->get_csconv();
1349 }
1350 
cat_result(std::string & result,const std::string & st)1351 void HunspellImpl::cat_result(std::string& result, const std::string& st) {
1352   if (!st.empty()) {
1353     if (!result.empty())
1354       result.append("\n");
1355     result.append(st);
1356   }
1357 }
1358 
analyze(const std::string & word)1359 std::vector<std::string> Hunspell::analyze(const std::string& word) {
1360   return m_Impl->analyze(word);
1361 }
1362 
analyze(const std::string & word)1363 std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
1364   std::vector<std::string> slst;
1365   if (!pSMgr || m_HMgrs.empty())
1366     return slst;
1367   if (utf8) {
1368     if (word.size() >= MAXWORDUTF8LEN)
1369       return slst;
1370   } else {
1371     if (word.size() >= MAXWORDLEN)
1372       return slst;
1373   }
1374   int captype = NOCAP;
1375   size_t abbv = 0;
1376   size_t wl = 0;
1377 
1378   std::string scw;
1379   std::vector<w_char> sunicw;
1380 
1381   // input conversion
1382   RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1383   {
1384     std::string wspace;
1385 
1386     bool convstatus = rl ? rl->conv(word, wspace) : false;
1387     if (convstatus)
1388       wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1389     else
1390       wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1391   }
1392 
1393   if (wl == 0) {
1394     if (abbv) {
1395       scw.clear();
1396       for (wl = 0; wl < abbv; wl++)
1397         scw.push_back('.');
1398       abbv = 0;
1399     } else
1400       return slst;
1401   }
1402 
1403   std::string result;
1404 
1405   size_t n = 0;
1406   // test numbers
1407   // LANG_hu section: set dash information for suggestions
1408   if (langnum == LANG_hu) {
1409     size_t n2 = 0;
1410     size_t n3 = 0;
1411 
1412     while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
1413                         (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
1414       n++;
1415       if ((scw[n] == '.') || (scw[n] == ',')) {
1416         if (((n2 == 0) && (n > 3)) ||
1417             ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
1418           break;
1419         n2++;
1420         n3 = n;
1421       }
1422     }
1423 
1424     if ((n == wl) && (n3 > 0) && (n - n3 > 3))
1425       return slst;
1426     if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
1427                       checkword(scw.substr(n), NULL, NULL))) {
1428       result.append(scw);
1429       result.resize(n - 1);
1430       if (n == wl)
1431         cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
1432       else {
1433         std::string chunk = scw.substr(n - 1, 1);
1434         cat_result(result, pSMgr->suggest_morph(chunk));
1435         result.push_back('+');  // XXX SPEC. MORPHCODE
1436         cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
1437       }
1438       return line_tok(result, MSEP_REC);
1439     }
1440   }
1441   // END OF LANG_hu section
1442 
1443   switch (captype) {
1444     case HUHCAP:
1445     case HUHINITCAP:
1446     case NOCAP: {
1447       cat_result(result, pSMgr->suggest_morph(scw));
1448       if (abbv) {
1449         std::string u8buffer(scw);
1450         u8buffer.push_back('.');
1451         cat_result(result, pSMgr->suggest_morph(u8buffer));
1452       }
1453       break;
1454     }
1455     case INITCAP: {
1456       mkallsmall2(scw, sunicw);
1457       std::string u8buffer(scw);
1458       mkinitcap2(scw, sunicw);
1459       cat_result(result, pSMgr->suggest_morph(u8buffer));
1460       cat_result(result, pSMgr->suggest_morph(scw));
1461       if (abbv) {
1462         u8buffer.push_back('.');
1463         cat_result(result, pSMgr->suggest_morph(u8buffer));
1464 
1465         u8buffer = scw;
1466         u8buffer.push_back('.');
1467 
1468         cat_result(result, pSMgr->suggest_morph(u8buffer));
1469       }
1470       break;
1471     }
1472     case ALLCAP: {
1473       cat_result(result, pSMgr->suggest_morph(scw));
1474       if (abbv) {
1475         std::string u8buffer(scw);
1476         u8buffer.push_back('.');
1477         cat_result(result, pSMgr->suggest_morph(u8buffer));
1478       }
1479       mkallsmall2(scw, sunicw);
1480       std::string u8buffer(scw);
1481       mkinitcap2(scw, sunicw);
1482 
1483       cat_result(result, pSMgr->suggest_morph(u8buffer));
1484       cat_result(result, pSMgr->suggest_morph(scw));
1485       if (abbv) {
1486         u8buffer.push_back('.');
1487         cat_result(result, pSMgr->suggest_morph(u8buffer));
1488 
1489         u8buffer = scw;
1490         u8buffer.push_back('.');
1491 
1492         cat_result(result, pSMgr->suggest_morph(u8buffer));
1493       }
1494       break;
1495     }
1496   }
1497 
1498   if (!result.empty()) {
1499     // word reversing wrapper for complex prefixes
1500     if (complexprefixes) {
1501       if (utf8)
1502         reverseword_utf(result);
1503       else
1504         reverseword(result);
1505     }
1506     return line_tok(result, MSEP_REC);
1507   }
1508 
1509   // compound word with dash (HU) I18n
1510   // LANG_hu section: set dash information for suggestions
1511 
1512   size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
1513   if (dash_pos != std::string::npos) {
1514     int nresult = 0;
1515 
1516     std::string part1 = scw.substr(0, dash_pos);
1517     std::string part2 = scw.substr(dash_pos+1);
1518 
1519     // examine 2 sides of the dash
1520     if (part2.empty()) {  // base word ending with dash
1521       if (spell(part1)) {
1522         std::string p = pSMgr->suggest_morph(part1);
1523         if (!p.empty()) {
1524           slst = line_tok(p, MSEP_REC);
1525           return slst;
1526         }
1527       }
1528     } else if (part2.size() == 1 && part2[0] == 'e') {  // XXX (HU) -e hat.
1529       if (spell(part1) && (spell("-e"))) {
1530         std::string st = pSMgr->suggest_morph(part1);
1531         if (!st.empty()) {
1532           result.append(st);
1533         }
1534         result.push_back('+');  // XXX spec. separator in MORPHCODE
1535         st = pSMgr->suggest_morph("-e");
1536         if (!st.empty()) {
1537           result.append(st);
1538         }
1539         return line_tok(result, MSEP_REC);
1540       }
1541     } else {
1542       // first word ending with dash: word- XXX ???
1543       part1.push_back(' ');
1544       nresult = spell(part1);
1545       part1.erase(part1.size() - 1);
1546       if (nresult && spell(part2) &&
1547           ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
1548         std::string st = pSMgr->suggest_morph(part1);
1549         if (!st.empty()) {
1550           result.append(st);
1551           result.push_back('+');  // XXX spec. separator in MORPHCODE
1552         }
1553         st = pSMgr->suggest_morph(part2);
1554         if (!st.empty()) {
1555           result.append(st);
1556         }
1557         return line_tok(result, MSEP_REC);
1558       }
1559     }
1560     // affixed number in correct word
1561     if (nresult && (dash_pos > 0) &&
1562         (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
1563          (scw[dash_pos - 1] == '.'))) {
1564       n = 1;
1565       if (scw[dash_pos - n] == '.')
1566         n++;
1567       // search first not a number character to left from dash
1568       while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
1569              (n < 6)) {
1570         n++;
1571       }
1572       if (dash_pos < n)
1573         n--;
1574       // numbers: valami1000000-hoz
1575       // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1576       // 56-hoz, 6-hoz
1577       for (; n >= 1; n--) {
1578         if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
1579             continue;
1580         }
1581         std::string chunk = scw.substr(dash_pos - n);
1582         if (checkword(chunk, NULL, NULL)) {
1583           result.append(chunk);
1584           std::string st = pSMgr->suggest_morph(chunk);
1585           if (!st.empty()) {
1586             result.append(st);
1587           }
1588           return line_tok(result, MSEP_REC);
1589         }
1590       }
1591     }
1592   }
1593   return slst;
1594 }
1595 
generate(const std::string & word,const std::vector<std::string> & pl)1596 std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
1597   return m_Impl->generate(word, pl);
1598 }
1599 
generate(const std::string & word,const std::vector<std::string> & pl)1600 std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
1601   std::vector<std::string> slst;
1602   if (!pSMgr || pl.empty())
1603     return slst;
1604   std::vector<std::string> pl2 = analyze(word);
1605   int captype = NOCAP;
1606   int abbv = 0;
1607   std::string cw;
1608   cleanword(cw, word, &captype, &abbv);
1609   std::string result;
1610 
1611   for (size_t i = 0; i < pl.size(); ++i) {
1612     cat_result(result, pSMgr->suggest_gen(pl2, pl[i]));
1613   }
1614 
1615   if (!result.empty()) {
1616     // allcap
1617     if (captype == ALLCAP)
1618       mkallcap(result);
1619 
1620     // line split
1621     slst = line_tok(result, MSEP_REC);
1622 
1623     // capitalize
1624     if (captype == INITCAP || captype == HUHINITCAP) {
1625       for (size_t j = 0; j < slst.size(); ++j) {
1626         mkinitcap(slst[j]);
1627       }
1628     }
1629 
1630     // temporary filtering of prefix related errors (eg.
1631     // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1632     std::vector<std::string>::iterator it = slst.begin();
1633     while (it != slst.end()) {
1634       if (!spell(*it)) {
1635         it = slst.erase(it);
1636       } else  {
1637         ++it;
1638       }
1639     }
1640   }
1641   return slst;
1642 }
1643 
generate(const std::string & word,const std::string & pattern)1644 std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
1645   return m_Impl->generate(word, pattern);
1646 }
1647 
generate(const std::string & word,const std::string & pattern)1648 std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
1649   std::vector<std::string> pl = analyze(pattern);
1650   std::vector<std::string> slst = generate(word, pl);
1651   uniqlist(slst);
1652   return slst;
1653 }
1654 
1655 // minimal XML parser functions
get_xml_par(const char * par)1656 std::string HunspellImpl::get_xml_par(const char* par) {
1657   std::string dest;
1658   if (!par)
1659     return dest;
1660   char end = *par;
1661   if (end == '>')
1662     end = '<';
1663   else if (end != '\'' && end != '"')
1664     return dest;  // bad XML
1665   for (par++; *par != '\0' && *par != end; ++par) {
1666     dest.push_back(*par);
1667   }
1668   mystrrep(dest, "&lt;", "<");
1669   mystrrep(dest, "&amp;", "&");
1670   return dest;
1671 }
1672 
get_langnum() const1673 int Hunspell::get_langnum() const {
1674   return m_Impl->get_langnum();
1675 }
1676 
get_langnum() const1677 int HunspellImpl::get_langnum() const {
1678   return langnum;
1679 }
1680 
input_conv(const std::string & word,std::string & dest)1681 bool Hunspell::input_conv(const std::string& word, std::string& dest) {
1682   return m_Impl->input_conv(word, dest);
1683 }
1684 
input_conv(const char * word,char * dest,size_t destsize)1685 int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
1686   std::string d;
1687   bool ret = input_conv(word, d);
1688   if (ret && d.size() < destsize) {
1689     strncpy(dest, d.c_str(), destsize);
1690     return 1;
1691   }
1692   return 0;
1693 }
1694 
input_conv(const std::string & word,std::string & dest)1695 bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
1696   RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
1697   if (rl) {
1698     return rl->conv(word, dest);
1699   }
1700   dest.assign(word);
1701   return false;
1702 }
1703 
1704 // return the beginning of the element (attr == NULL) or the attribute
get_xml_pos(const char * s,const char * attr)1705 const char* HunspellImpl::get_xml_pos(const char* s, const char* attr) {
1706   const char* end = strchr(s, '>');
1707   if (attr == NULL)
1708     return end;
1709   const char* p = s;
1710   while (1) {
1711     p = strstr(p, attr);
1712     if (!p || p >= end)
1713       return 0;
1714     if (*(p - 1) == ' ' || *(p - 1) == '\n')
1715       break;
1716     p += strlen(attr);
1717   }
1718   return p + strlen(attr);
1719 }
1720 
check_xml_par(const char * q,const char * attr,const char * value)1721 int HunspellImpl::check_xml_par(const char* q,
1722                             const char* attr,
1723                             const char* value) {
1724   std::string cw = get_xml_par(get_xml_pos(q, attr));
1725   if (cw == value)
1726     return 1;
1727   return 0;
1728 }
1729 
get_xml_list(const char * list,const char * tag)1730 std::vector<std::string> HunspellImpl::get_xml_list(const char* list, const char* tag) {
1731   std::vector<std::string> slst;
1732   if (!list)
1733     return slst;
1734   const char* p = list;
1735   for (size_t n = 0; ((p = strstr(p, tag)) != NULL); ++p, ++n) {
1736     std::string cw = get_xml_par(p + strlen(tag) - 1);
1737     if (cw.empty()) {
1738       break;
1739     }
1740     slst.push_back(cw);
1741   }
1742   return slst;
1743 }
1744 
spellml(const std::string & in_word)1745 std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
1746   std::vector<std::string> slst;
1747 
1748   const char* word = in_word.c_str();
1749 
1750   const char* q = strstr(word, "<query");
1751   if (!q)
1752     return slst;  // bad XML input
1753   const char* q2 = strchr(q, '>');
1754   if (!q2)
1755     return slst;  // bad XML input
1756   q2 = strstr(q2, "<word");
1757   if (!q2)
1758     return slst;  // bad XML input
1759   if (check_xml_par(q, "type=", "analyze")) {
1760     std::string cw = get_xml_par(strchr(q2, '>'));
1761     if (!cw.empty())
1762       slst = analyze(cw);
1763     if (slst.empty())
1764       return slst;
1765     // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1766     std::string r;
1767     r.append("<code>");
1768     for (size_t i = 0; i < slst.size(); ++i) {
1769       r.append("<a>");
1770 
1771       std::string entry(slst[i]);
1772       mystrrep(entry, "\t", " ");
1773       mystrrep(entry, "&", "&amp;");
1774       mystrrep(entry, "<", "&lt;");
1775       r.append(entry);
1776 
1777       r.append("</a>");
1778     }
1779     r.append("</code>");
1780     slst.clear();
1781     slst.push_back(r);
1782     return slst;
1783   } else if (check_xml_par(q, "type=", "stem")) {
1784     std::string cw = get_xml_par(strchr(q2, '>'));
1785     if (!cw.empty())
1786       return stem(cw);
1787   } else if (check_xml_par(q, "type=", "generate")) {
1788     std::string cw = get_xml_par(strchr(q2, '>'));
1789     if (cw.empty())
1790       return slst;
1791     const char* q3 = strstr(q2 + 1, "<word");
1792     if (q3) {
1793       std::string cw2 = get_xml_par(strchr(q3, '>'));
1794       if (!cw2.empty()) {
1795         return generate(cw, cw2);
1796       }
1797     } else {
1798       if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
1799         std::vector<std::string> slst2 = get_xml_list(strchr(q2, '>'), "<a>");
1800         if (!slst2.empty()) {
1801           slst = generate(cw, slst2);
1802           uniqlist(slst);
1803           return slst;
1804         }
1805       }
1806     }
1807   }
1808   return slst;
1809 }
1810 
spell(const char * word,int * info,char ** root)1811 int Hunspell::spell(const char* word, int* info, char** root) {
1812   std::string sroot;
1813   bool ret = m_Impl->spell(word, info, root ? &sroot : NULL);
1814   if (root) {
1815     if (sroot.empty()) {
1816       *root = NULL;
1817     } else {
1818       *root = mystrdup(sroot.c_str());
1819     }
1820   }
1821   return ret;
1822 }
1823 
1824 namespace {
munge_vector(char *** slst,const std::vector<std::string> & items)1825   int munge_vector(char*** slst, const std::vector<std::string>& items) {
1826     if (items.empty()) {
1827       *slst = NULL;
1828       return 0;
1829     } else {
1830       *slst = (char**)malloc(sizeof(char*) * items.size());
1831       if (!*slst)
1832         return 0;
1833       for (size_t i = 0; i < items.size(); ++i)
1834         (*slst)[i] = mystrdup(items[i].c_str());
1835     }
1836     return items.size();
1837   }
1838 }
1839 
free_list(char *** slst,int n)1840 void Hunspell::free_list(char*** slst, int n) {
1841   Hunspell_free_list((Hunhandle*)(this), slst, n);
1842 }
1843 
suggest(char *** slst,const char * word)1844 int Hunspell::suggest(char*** slst, const char* word) {
1845   return Hunspell_suggest((Hunhandle*)(this), slst, word);
1846 }
1847 
suffix_suggest(char *** slst,const char * root_word)1848 int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
1849   std::vector<std::string> stems = m_Impl->suffix_suggest(root_word);
1850   return munge_vector(slst, stems);
1851 }
1852 
get_dic_encoding()1853 char* Hunspell::get_dic_encoding() {
1854   return &(m_Impl->dic_encoding_vec[0]);
1855 }
1856 
stem(char *** slst,char ** desc,int n)1857 int Hunspell::stem(char*** slst, char** desc, int n) {
1858   return Hunspell_stem2((Hunhandle*)(this), slst, desc, n);
1859 }
1860 
stem(char *** slst,const char * word)1861 int Hunspell::stem(char*** slst, const char* word) {
1862   return Hunspell_stem((Hunhandle*)(this), slst, word);
1863 }
1864 
analyze(char *** slst,const char * word)1865 int Hunspell::analyze(char*** slst, const char* word) {
1866   return Hunspell_analyze((Hunhandle*)(this), slst, word);
1867 }
1868 
generate(char *** slst,const char * word,char ** pl,int pln)1869 int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
1870   return Hunspell_generate2((Hunhandle*)(this), slst, word, pl, pln);
1871 }
1872 
generate(char *** slst,const char * word,const char * pattern)1873 int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
1874   return Hunspell_generate((Hunhandle*)(this), slst, word, pattern);
1875 }
1876 
Hunspell_create(const char * affpath,const char * dpath)1877 Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
1878   return (Hunhandle*)(new Hunspell(affpath, dpath));
1879 }
1880 
Hunspell_create_key(const char * affpath,const char * dpath,const char * key)1881 Hunhandle* Hunspell_create_key(const char* affpath,
1882                                const char* dpath,
1883                                const char* key) {
1884   return reinterpret_cast<Hunhandle*>(new Hunspell(affpath, dpath, key));
1885 }
1886 
Hunspell_destroy(Hunhandle * pHunspell)1887 void Hunspell_destroy(Hunhandle* pHunspell) {
1888   delete reinterpret_cast<Hunspell*>(pHunspell);
1889 }
1890 
Hunspell_add_dic(Hunhandle * pHunspell,const char * dpath)1891 int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
1892   return reinterpret_cast<Hunspell*>(pHunspell)->add_dic(dpath);
1893 }
1894 
Hunspell_spell(Hunhandle * pHunspell,const char * word)1895 int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
1896   return reinterpret_cast<Hunspell*>(pHunspell)->spell(std::string(word));
1897 }
1898 
Hunspell_get_dic_encoding(Hunhandle * pHunspell)1899 char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
1900   return reinterpret_cast<Hunspell*>(pHunspell)->get_dic_encoding();
1901 }
1902 
Hunspell_suggest(Hunhandle * pHunspell,char *** slst,const char * word)1903 int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
1904   std::vector<std::string> suggests = reinterpret_cast<Hunspell*>(pHunspell)->suggest(word);
1905   return munge_vector(slst, suggests);
1906 }
1907 
Hunspell_analyze(Hunhandle * pHunspell,char *** slst,const char * word)1908 int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
1909   std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->analyze(word);
1910   return munge_vector(slst, stems);
1911 }
1912 
Hunspell_stem(Hunhandle * pHunspell,char *** slst,const char * word)1913 int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
1914 
1915   std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(word);
1916   return munge_vector(slst, stems);
1917 }
1918 
Hunspell_stem2(Hunhandle * pHunspell,char *** slst,char ** desc,int n)1919 int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
1920   std::vector<std::string> morph;
1921   for (int i = 0; i < n; ++i)
1922     morph.push_back(desc[i]);
1923 
1924   std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->stem(morph);
1925   return munge_vector(slst, stems);
1926 }
1927 
Hunspell_generate(Hunhandle * pHunspell,char *** slst,const char * word,const char * pattern)1928 int Hunspell_generate(Hunhandle* pHunspell,
1929                       char*** slst,
1930                       const char* word,
1931                       const char* pattern) {
1932   std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, pattern);
1933   return munge_vector(slst, stems);
1934 }
1935 
Hunspell_generate2(Hunhandle * pHunspell,char *** slst,const char * word,char ** desc,int n)1936 int Hunspell_generate2(Hunhandle* pHunspell,
1937                        char*** slst,
1938                        const char* word,
1939                        char** desc,
1940                        int n) {
1941   std::vector<std::string> morph;
1942   for (int i = 0; i < n; ++i)
1943     morph.push_back(desc[i]);
1944 
1945   std::vector<std::string> stems = reinterpret_cast<Hunspell*>(pHunspell)->generate(word, morph);
1946   return munge_vector(slst, stems);
1947 }
1948 
1949 /* functions for run-time modification of the dictionary */
1950 
1951 /* add word to the run-time dictionary */
1952 
Hunspell_add(Hunhandle * pHunspell,const char * word)1953 int Hunspell_add(Hunhandle* pHunspell, const char* word) {
1954   return reinterpret_cast<Hunspell*>(pHunspell)->add(word);
1955 }
1956 
1957 /* add word to the run-time dictionary with affix flags of
1958  * the example (a dictionary word): Hunspell will recognize
1959  * affixed forms of the new word, too.
1960  */
1961 
Hunspell_add_with_affix(Hunhandle * pHunspell,const char * word,const char * example)1962 int Hunspell_add_with_affix(Hunhandle* pHunspell,
1963                             const char* word,
1964                             const char* example) {
1965   return reinterpret_cast<Hunspell*>(pHunspell)->add_with_affix(word, example);
1966 }
1967 
1968 /* remove word from the run-time dictionary */
1969 
Hunspell_remove(Hunhandle * pHunspell,const char * word)1970 int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
1971   return reinterpret_cast<Hunspell*>(pHunspell)->remove(word);
1972 }
1973 
Hunspell_free_list(Hunhandle *,char *** list,int n)1974 void Hunspell_free_list(Hunhandle*, char*** list, int n) {
1975   if (list && *list) {
1976     for (int i = 0; i < n; i++)
1977       free((*list)[i]);
1978     free(*list);
1979     *list = NULL;
1980   }
1981 }
1982 
suffix_suggest(const std::string & root_word)1983 std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
1984   return m_Impl->suffix_suggest(root_word);
1985 }
1986 
suffix_suggest(const std::string & root_word)1987 std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
1988   std::vector<std::string> slst;
1989   struct hentry* he = NULL;
1990   int len;
1991   std::string w2;
1992   const char* word;
1993   const char* ignoredchars = pAMgr->get_ignore();
1994   if (ignoredchars != NULL) {
1995     w2.assign(root_word);
1996     if (utf8) {
1997       const std::vector<w_char>& ignoredchars_utf16 =
1998           pAMgr->get_ignore_utf16();
1999       remove_ignored_chars_utf(w2, ignoredchars_utf16);
2000     } else {
2001       remove_ignored_chars(w2, ignoredchars);
2002     }
2003     word = w2.c_str();
2004   } else
2005     word = root_word.c_str();
2006 
2007   len = strlen(word);
2008 
2009   if (!len)
2010     return slst;
2011 
2012   for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
2013     he = m_HMgrs[i]->lookup(word);
2014   }
2015   if (he) {
2016     slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());
2017   }
2018   return slst;
2019 }
2020