1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * Copyright (C) 2002-2017 Németh László
5  *
6  * The contents of this file are subject to the Mozilla Public License Version
7  * 1.1 (the "License"); you may not use this file except in compliance with
8  * the License. You may obtain a copy of the License at
9  * http://www.mozilla.org/MPL/
10  *
11  * Software distributed under the License is distributed on an "AS IS" basis,
12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13  * for the specific language governing rights and limitations under the
14  * License.
15  *
16  * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17  *
18  * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19  * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20  * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21  * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22  * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23  *
24  * Alternatively, the contents of this file may be used under the terms of
25  * either the GNU General Public License Version 2 or later (the "GPL"), or
26  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27  * in which case the provisions of the GPL or the LGPL are applicable instead
28  * of those above. If you wish to allow use of your version of this file only
29  * under the terms of either the GPL or the LGPL, and not to allow others to
30  * use your version of this file under the terms of the MPL, indicate your
31  * decision by deleting the provisions above and replace them with the notice
32  * and other provisions required by the GPL or the LGPL. If you do not delete
33  * the provisions above, a recipient may use your version of this file under
34  * the terms of any one of the MPL, the GPL or the LGPL.
35  *
36  * ***** END LICENSE BLOCK ***** */
37 /*
38  * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39  * And Contributors.  All rights reserved.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  *
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  *
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  *
52  * 3. All modifications to the source code must be clearly marked as
53  *    such.  Binary redistributions based on modified source code
54  *    must be clearly marked as modified versions in the documentation
55  *    and/or other materials provided with the distribution.
56  *
57  * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
61  * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  */
70 
71 #include <stdlib.h>
72 #include <string.h>
73 #include <stdio.h>
74 #include <time.h>
75 
76 #include "affixmgr.hxx"
77 #include "hunspell.hxx"
78 #include "suggestmgr.hxx"
79 #include "hunspell.h"
80 #include "csutil.hxx"
81 
82 #include <limits>
83 #include <string>
84 
85 #define MAXWORDUTF8LEN (MAXWORDLEN * 3)
86 
87 class HunspellImpl
88 {
89 public:
90   HunspellImpl(const char* affpath, const char* dpath, const char* key = NULL);
91   ~HunspellImpl();
92   int add_dic(const char* dpath, const char* key = NULL);
93   std::vector<std::string> suffix_suggest(const std::string& root_word);
94   std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
95   std::vector<std::string> generate(const std::string& word, const std::string& pattern);
96   std::vector<std::string> stem(const std::string& word);
97   std::vector<std::string> stem(const std::vector<std::string>& morph);
98   std::vector<std::string> analyze(const std::string& word);
99   int get_langnum() const;
100   bool input_conv(const std::string& word, std::string& dest);
101   bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
102   std::vector<std::string> suggest(const std::string& word);
103   const std::string& get_wordchars_cpp() const;
104   const std::vector<w_char>& get_wordchars_utf16() const;
105   const std::string& get_dict_encoding() const;
106   int add(const std::string& word);
107   int add_with_affix(const std::string& word, const std::string& example);
108   int remove(const std::string& word);
109   const std::string& get_version_cpp() const;
110   struct cs_info* get_csconv();
111 
112   int spell(const char* word, int* info = NULL, char** root = NULL);
113   int suggest(char*** slst, const char* word);
114   int suffix_suggest(char*** slst, const char* root_word);
115   void free_list(char*** slst, int n);
116   char* get_dic_encoding();
117   int analyze(char*** slst, const char* word);
118   int stem(char*** slst, const char* word);
119   int stem(char*** slst, char** morph, int n);
120   int generate(char*** slst, const char* word, const char* word2);
121   int generate(char*** slst, const char* word, char** desc, int n);
122   const char* get_wordchars() const;
123   const char* get_version() const;
124   int input_conv(const char* word, char* dest, size_t destsize);
125 
126 private:
127   AffixMgr* pAMgr;
128   std::vector<HashMgr*> m_HMgrs;
129   SuggestMgr* pSMgr;
130   char* affixpath;
131   std::string encoding;
132   struct cs_info* csconv;
133   int langnum;
134   int utf8;
135   int complexprefixes;
136   std::vector<std::string> wordbreak;
137 
138 private:
139   std::vector<std::string> analyze_internal(const std::string& word);
140   bool spell_internal(const std::string& word, int* info = NULL, std::string* root = NULL);
141   std::vector<std::string> suggest_internal(const std::string& word,
142                     bool& capitalized, size_t& abbreviated, int& captype);
143   void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
144   size_t cleanword2(std::string& dest,
145                     std::vector<w_char>& dest_u,
146                     const std::string& src,
147                     int* pcaptype,
148                     size_t* pabbrev);
149   void clean_ignore(std::string& dest, const std::string& src);
150   void mkinitcap(std::string& u8);
151   int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
152   int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
153   void mkallcap(std::string& u8);
154   int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
155   struct hentry* checkword(const std::string& source, int* info, std::string* root);
156   std::string sharps_u8_l1(const std::string& source);
157   hentry*
158   spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);
159   int is_keepcase(const hentry* rv);
160   void insert_sug(std::vector<std::string>& slst, const std::string& word);
161   void cat_result(std::string& result, const std::string& st);
162   std::vector<std::string> spellml(const std::string& word);
163   std::string get_xml_par(const char* par);
164   const char* get_xml_pos(const char* s, const char* attr);
165   std::vector<std::string> get_xml_list(const char* list, const char* tag);
166   int check_xml_par(const char* q, const char* attr, const char* value);
167 private:
168   HunspellImpl(const HunspellImpl&);
169   HunspellImpl& operator=(const HunspellImpl&);
170 };
171 
HunspellImpl(const char * affpath,const char * dpath,const char * key)172 HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) {
173   csconv = NULL;
174   utf8 = 0;
175   complexprefixes = 0;
176   affixpath = mystrdup(affpath);
177 
178   /* first set up the hash manager */
179   m_HMgrs.push_back(new HashMgr(dpath, affpath, key));
180 
181   /* next set up the affix manager */
182   /* it needs access to the hash manager lookup methods */
183   pAMgr = new AffixMgr(affpath, m_HMgrs, key);
184 
185   /* get the preferred try string and the dictionary */
186   /* encoding from the Affix Manager for that dictionary */
187   char* try_string = pAMgr->get_try_string();
188   encoding = pAMgr->get_encoding();
189   langnum = pAMgr->get_langnum();
190   utf8 = pAMgr->get_utf8();
191   if (!utf8)
192     csconv = get_current_cs(encoding);
193   complexprefixes = pAMgr->get_complexprefixes();
194   wordbreak = pAMgr->get_breaktable();
195 
196   /* and finally set up the suggestion manager */
197   pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
198   if (try_string)
199     free(try_string);
200 }
201 
~HunspellImpl()202 HunspellImpl::~HunspellImpl() {
203   delete pSMgr;
204   delete pAMgr;
205   for (size_t i = 0; i < m_HMgrs.size(); ++i)
206     delete m_HMgrs[i];
207   pSMgr = NULL;
208   pAMgr = NULL;
209 #ifdef MOZILLA_CLIENT
210   delete[] csconv;
211 #endif
212   csconv = NULL;
213   if (affixpath)
214     free(affixpath);
215   affixpath = NULL;
216 }
217 
218 // load extra dictionaries
add_dic(const char * dpath,const char * key)219 int HunspellImpl::add_dic(const char* dpath, const char* key) {
220   if (!affixpath)
221     return 1;
222   m_HMgrs.push_back(new HashMgr(dpath, affixpath, key));
223   return 0;
224 }
225 
226 
227 // make a copy of src at dest while removing all characters
228 // specified in IGNORE rule
clean_ignore(std::string & dest,const std::string & src)229 void HunspellImpl::clean_ignore(std::string& dest,
230                                 const std::string& src) {
231   dest.clear();
232   dest.assign(src);
233   const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
234   if (ignoredchars != NULL) {
235     if (utf8) {
236       const std::vector<w_char>& ignoredchars_utf16 =
237           pAMgr->get_ignore_utf16();
238       remove_ignored_chars_utf(dest, ignoredchars_utf16);
239     } else {
240       remove_ignored_chars(dest, ignoredchars);
241     }
242   }
243 }
244 
245 
246 // make a copy of src at destination while removing all leading
247 // blanks and removing any trailing periods after recording
248 // their presence with the abbreviation flag
249 // also since already going through character by character,
250 // set the capitalization type
251 // return the length of the "cleaned" (and UTF-8 encoded) word
252 
cleanword2(std::string & dest,std::vector<w_char> & dest_utf,const std::string & src,int * pcaptype,size_t * pabbrev)253 size_t HunspellImpl::cleanword2(std::string& dest,
254                          std::vector<w_char>& dest_utf,
255                          const std::string& src,
256                          int* pcaptype,
257                          size_t* pabbrev) {
258   dest.clear();
259   dest_utf.clear();
260 
261   // remove IGNORE characters from the string
262   std::string w2;
263   clean_ignore(w2, src);
264 
265   const char* q = w2.c_str();
266 
267   // first skip over any leading blanks
268   while (*q == ' ')
269     ++q;
270 
271   // now strip off any trailing periods (recording their presence)
272   *pabbrev = 0;
273   int nl = strlen(q);
274   while ((nl > 0) && (*(q + nl - 1) == '.')) {
275     nl--;
276     (*pabbrev)++;
277   }
278 
279   // if no characters are left it can't be capitalized
280   if (nl <= 0) {
281     *pcaptype = NOCAP;
282     return 0;
283   }
284 
285   dest.append(q, nl);
286   nl = dest.size();
287   if (utf8) {
288     u8_u16(dest_utf, dest);
289     *pcaptype = get_captype_utf8(dest_utf, langnum);
290   } else {
291     *pcaptype = get_captype(dest, csconv);
292   }
293   return nl;
294 }
295 
cleanword(std::string & dest,const std::string & src,int * pcaptype,int * pabbrev)296 void HunspellImpl::cleanword(std::string& dest,
297                         const std::string& src,
298                         int* pcaptype,
299                         int* pabbrev) {
300   dest.clear();
301   const unsigned char* q = (const unsigned char*)src.c_str();
302   int firstcap = 0;
303 
304   // first skip over any leading blanks
305   while (*q == ' ')
306     ++q;
307 
308   // now strip off any trailing periods (recording their presence)
309   *pabbrev = 0;
310   int nl = strlen((const char*)q);
311   while ((nl > 0) && (*(q + nl - 1) == '.')) {
312     nl--;
313     (*pabbrev)++;
314   }
315 
316   // if no characters are left it can't be capitalized
317   if (nl <= 0) {
318     *pcaptype = NOCAP;
319     return;
320   }
321 
322   // now determine the capitalization type of the first nl letters
323   int ncap = 0;
324   int nneutral = 0;
325   int nc = 0;
326 
327   if (!utf8) {
328     while (nl > 0) {
329       nc++;
330       if (csconv[(*q)].ccase)
331         ncap++;
332       if (csconv[(*q)].cupper == csconv[(*q)].clower)
333         nneutral++;
334       dest.push_back(*q++);
335       nl--;
336     }
337     // remember to terminate the destination string
338     firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
339   } else {
340     std::vector<w_char> t;
341     u8_u16(t, src);
342     for (size_t i = 0; i < t.size(); ++i) {
343       unsigned short idx = (t[i].h << 8) + t[i].l;
344       unsigned short low = unicodetolower(idx, langnum);
345       if (idx != low)
346         ncap++;
347       if (unicodetoupper(idx, langnum) == low)
348         nneutral++;
349     }
350     u16_u8(dest, t);
351     if (ncap) {
352       unsigned short idx = (t[0].h << 8) + t[0].l;
353       firstcap = (idx != unicodetolower(idx, langnum));
354     }
355   }
356 
357   // now finally set the captype
358   if (ncap == 0) {
359     *pcaptype = NOCAP;
360   } else if ((ncap == 1) && firstcap) {
361     *pcaptype = INITCAP;
362   } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
363     *pcaptype = ALLCAP;
364   } else if ((ncap > 1) && firstcap) {
365     *pcaptype = HUHINITCAP;
366   } else {
367     *pcaptype = HUHCAP;
368   }
369 }
370 
mkallcap(std::string & u8)371 void HunspellImpl::mkallcap(std::string& u8) {
372   if (utf8) {
373     std::vector<w_char> u16;
374     u8_u16(u16, u8);
375     ::mkallcap_utf(u16, langnum);
376     u16_u8(u8, u16);
377   } else {
378     ::mkallcap(u8, csconv);
379   }
380 }
381 
mkallsmall2(std::string & u8,std::vector<w_char> & u16)382 int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
383   if (utf8) {
384     ::mkallsmall_utf(u16, langnum);
385     u16_u8(u8, u16);
386   } else {
387     ::mkallsmall(u8, csconv);
388   }
389   return u8.size();
390 }
391 
392 // convert UTF-8 sharp S codes to latin 1
sharps_u8_l1(const std::string & source)393 std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
394   std::string dest(source);
395   mystrrep(dest, "\xC3\x9F", "\xDF");
396   return dest;
397 }
398 
399 // recursive search for right ss - sharp s permutations
spellsharps(std::string & base,size_t n_pos,int n,int repnum,int * info,std::string * root)400 hentry* HunspellImpl::spellsharps(std::string& base,
401                               size_t n_pos,
402                               int n,
403                               int repnum,
404                               int* info,
405                               std::string* root) {
406   size_t pos = base.find("ss", n_pos);
407   if (pos != std::string::npos && (n < MAXSHARPS)) {
408     base[pos] = '\xC3';
409     base[pos + 1] = '\x9F';
410     hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
411     if (h)
412       return h;
413     base[pos] = 's';
414     base[pos + 1] = 's';
415     h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
416     if (h)
417       return h;
418   } else if (repnum > 0) {
419     if (utf8)
420       return checkword(base, info, root);
421     std::string tmp(sharps_u8_l1(base));
422     return checkword(tmp, info, root);
423   }
424   return NULL;
425 }
426 
is_keepcase(const hentry * rv)427 int HunspellImpl::is_keepcase(const hentry* rv) {
428   return pAMgr && rv->astr && pAMgr->get_keepcase() &&
429          TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
430 }
431 
432 /* insert a word to the beginning of the suggestion array */
insert_sug(std::vector<std::string> & slst,const std::string & word)433 void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
434   slst.insert(slst.begin(), word);
435 }
436 
spell(const std::string & word,int * info,std::string * root)437 bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {
438   bool r = spell_internal(word, info, root);
439   if (r && root) {
440     // output conversion
441     RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
442     if (rl) {
443       std::string wspace;
444       if (rl->conv(*root, wspace)) {
445         *root = wspace;
446       }
447     }
448   }
449   return r;
450 }
451 
spell_internal(const std::string & word,int * info,std::string * root)452 bool HunspellImpl::spell_internal(const std::string& word, int* info, std::string* root) {
453   struct hentry* rv = NULL;
454 
455   int info2 = 0;
456   if (!info)
457     info = &info2;
458   else
459     *info = 0;
460 
461   // Hunspell supports XML input of the simplified API (see manual)
462   if (word == SPELL_XML)
463     return true;
464   if (utf8) {
465     if (word.size() >= MAXWORDUTF8LEN)
466       return false;
467   } else {
468     if (word.size() >= MAXWORDLEN)
469       return false;
470   }
471   int captype = NOCAP;
472   size_t abbv = 0;
473   size_t wl = 0;
474 
475   std::string scw;
476   std::vector<w_char> sunicw;
477 
478   // input conversion
479   RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
480   {
481     std::string wspace;
482 
483     bool convstatus = rl ? rl->conv(word, wspace) : false;
484     if (convstatus)
485       wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
486     else
487       wl = cleanword2(scw, sunicw, word, &captype, &abbv);
488   }
489 
490 #ifdef MOZILLA_CLIENT
491   // accept the abbreviated words without dots
492   // workaround for the incomplete tokenization of Mozilla
493   abbv = 1;
494 #endif
495 
496   if (wl == 0 || m_HMgrs.empty())
497     return true;
498   if (root)
499     root->clear();
500 
501   // allow numbers with dots, dashes and commas (but forbid double separators:
502   // "..", "--" etc.)
503   enum { NBEGIN, NNUM, NSEP };
504   int nstate = NBEGIN;
505   size_t i;
506 
507   for (i = 0; (i < wl); i++) {
508     if ((scw[i] <= '9') && (scw[i] >= '0')) {
509       nstate = NNUM;
510     } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
511       if ((nstate == NSEP) || (i == 0))
512         break;
513       nstate = NSEP;
514     } else
515       break;
516   }
517   if ((i == wl) && (nstate == NNUM))
518     return true;
519 
520   switch (captype) {
521     case HUHCAP:
522     /* FALLTHROUGH */
523     case HUHINITCAP:
524       *info += SPELL_ORIGCAP;
525     /* FALLTHROUGH */
526     case NOCAP:
527       rv = checkword(scw, info, root);
528       if ((abbv) && !(rv)) {
529         std::string u8buffer(scw);
530         u8buffer.push_back('.');
531         rv = checkword(u8buffer, info, root);
532       }
533       break;
534     case ALLCAP: {
535       *info += SPELL_ORIGCAP;
536       rv = checkword(scw, info, root);
537       if (rv)
538         break;
539       if (abbv) {
540         std::string u8buffer(scw);
541         u8buffer.push_back('.');
542         rv = checkword(u8buffer, info, root);
543         if (rv)
544           break;
545       }
546       // Spec. prefix handling for Catalan, French, Italian:
547       // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
548       size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
549       if (apos != std::string::npos) {
550         mkallsmall2(scw, sunicw);
551         //conversion may result in string with different len to pre-mkallsmall2
552         //so re-scan
553         if (apos != std::string::npos && apos < scw.size() - 1) {
554           std::string part1 = scw.substr(0, apos+1);
555           std::string part2 = scw.substr(apos+1);
556           if (utf8) {
557             std::vector<w_char> part1u, part2u;
558             u8_u16(part1u, part1);
559             u8_u16(part2u, part2);
560             mkinitcap2(part2, part2u);
561             scw = part1 + part2;
562             sunicw = part1u;
563             sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
564             rv = checkword(scw, info, root);
565             if (rv)
566               break;
567           } else {
568             mkinitcap2(part2, sunicw);
569             scw = part1 + part2;
570             rv = checkword(scw, info, root);
571             if (rv)
572               break;
573           }
574           mkinitcap2(scw, sunicw);
575           rv = checkword(scw, info, root);
576           if (rv)
577             break;
578         }
579       }
580       if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
581 
582         mkallsmall2(scw, sunicw);
583         std::string u8buffer(scw);
584         rv = spellsharps(u8buffer, 0, 0, 0, info, root);
585         if (!rv) {
586           mkinitcap2(scw, sunicw);
587           rv = spellsharps(scw, 0, 0, 0, info, root);
588         }
589         if ((abbv) && !(rv)) {
590           u8buffer.push_back('.');
591           rv = spellsharps(u8buffer, 0, 0, 0, info, root);
592           if (!rv) {
593             u8buffer = std::string(scw);
594             u8buffer.push_back('.');
595             rv = spellsharps(u8buffer, 0, 0, 0, info, root);
596           }
597         }
598         if (rv)
599           break;
600       }
601     }
602       /* FALLTHROUGH */
603     case INITCAP: {
604       // handle special capitalization of dotted I
605       bool Idot = (utf8 && (unsigned char) scw[0] == 0xc4 && (unsigned char) scw[1] == 0xb0);
606       *info += SPELL_ORIGCAP;
607       if (captype == ALLCAP) {
608           mkallsmall2(scw, sunicw);
609           mkinitcap2(scw, sunicw);
610           if (Idot)
611              scw.replace(0, 1, "\xc4\xb0");
612       }
613       if (captype == INITCAP)
614         *info += SPELL_INITCAP;
615       rv = checkword(scw, info, root);
616       if (captype == INITCAP)
617         *info -= SPELL_INITCAP;
618       // forbid bad capitalization
619       // (for example, ijs -> Ijs instead of IJs in Dutch)
620       // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
621       if (*info & SPELL_FORBIDDEN) {
622         rv = NULL;
623         break;
624       }
625       if (rv && is_keepcase(rv) && (captype == ALLCAP))
626         rv = NULL;
627       if (rv || (Idot && langnum != LANG_az && langnum != LANG_tr && langnum != LANG_crh))
628         break;
629 
630       mkallsmall2(scw, sunicw);
631       std::string u8buffer(scw);
632       mkinitcap2(scw, sunicw);
633 
634       rv = checkword(u8buffer, info, root);
635       if (abbv && !rv) {
636         u8buffer.push_back('.');
637         rv = checkword(u8buffer, info, root);
638         if (!rv) {
639           u8buffer = scw;
640           u8buffer.push_back('.');
641           if (captype == INITCAP)
642             *info += SPELL_INITCAP;
643           rv = checkword(u8buffer, info, root);
644           if (captype == INITCAP)
645             *info -= SPELL_INITCAP;
646           if (rv && is_keepcase(rv) && (captype == ALLCAP))
647             rv = NULL;
648           break;
649         }
650       }
651       if (rv && is_keepcase(rv) &&
652           ((captype == ALLCAP) ||
653            // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
654            // in INITCAP form, too.
655            !(pAMgr->get_checksharps() &&
656              ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
657               (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
658         rv = NULL;
659       break;
660     }
661   }
662 
663   if (rv) {
664     if (pAMgr && pAMgr->get_warn() && rv->astr &&
665         TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
666       *info += SPELL_WARN;
667       if (pAMgr->get_forbidwarn())
668         return false;
669       return true;
670     }
671     return true;
672   }
673 
674   // recursive breaking at break points
675   if (!wordbreak.empty() && !(*info & SPELL_FORBIDDEN)) {
676 
677     int nbr = 0;
678     wl = scw.size();
679 
680     // calculate break points for recursion limit
681     for (size_t j = 0; j < wordbreak.size(); ++j) {
682       size_t pos = 0;
683       while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) {
684         ++nbr;
685         pos += wordbreak[j].size();
686       }
687     }
688     if (nbr >= 10)
689       return false;
690 
691     // check boundary patterns (^begin and end$)
692     for (size_t j = 0; j < wordbreak.size(); ++j) {
693       size_t plen = wordbreak[j].size();
694       if (plen == 1 || plen > wl)
695         continue;
696 
697       if (wordbreak[j][0] == '^' &&
698           scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1)))
699         return true;
700 
701       if (wordbreak[j][plen - 1] == '$' &&
702           scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) {
703         std::string suffix(scw.substr(wl - plen + 1));
704         scw.resize(wl - plen + 1);
705         if (spell(scw))
706           return true;
707         scw.append(suffix);
708       }
709     }
710 
711     // other patterns
712     for (size_t j = 0; j < wordbreak.size(); ++j) {
713       size_t plen = wordbreak[j].size();
714       size_t found = scw.find(wordbreak[j]);
715       if ((found > 0) && (found < wl - plen)) {
716         size_t found2 = scw.find(wordbreak[j], found + 1);
717         // try to break at the second occurance
718         // to recognize dictionary words with wordbreak
719         if (found2 > 0 && (found2 < wl - plen))
720             found = found2;
721         if (!spell(scw.substr(found + plen)))
722           continue;
723         std::string suffix(scw.substr(found));
724         scw.resize(found);
725         // examine 2 sides of the break point
726         if (spell(scw))
727           return true;
728         scw.append(suffix);
729 
730         // LANG_hu: spec. dash rule
731         if (langnum == LANG_hu && wordbreak[j] == "-") {
732           suffix = scw.substr(found + 1);
733           scw.resize(found + 1);
734           if (spell(scw))
735             return true;  // check the first part with dash
736           scw.append(suffix);
737         }
738         // end of LANG specific region
739       }
740     }
741 
742     // other patterns (break at first break point)
743     for (size_t j = 0; j < wordbreak.size(); ++j) {
744       size_t plen = wordbreak[j].size();
745       size_t found = scw.find(wordbreak[j]);
746       if ((found > 0) && (found < wl - plen)) {
747         if (!spell(scw.substr(found + plen)))
748           continue;
749         std::string suffix(scw.substr(found));
750         scw.resize(found);
751         // examine 2 sides of the break point
752         if (spell(scw))
753           return true;
754         scw.append(suffix);
755 
756         // LANG_hu: spec. dash rule
757         if (langnum == LANG_hu && wordbreak[j] == "-") {
758           suffix = scw.substr(found + 1);
759           scw.resize(found + 1);
760           if (spell(scw))
761             return true;  // check the first part with dash
762           scw.append(suffix);
763         }
764         // end of LANG specific region
765       }
766     }
767   }
768 
769   return false;
770 }
771 
checkword(const std::string & w,int * info,std::string * root)772 struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
773   bool usebuffer = false;
774   std::string w2;
775   const char* word;
776   int len;
777 
778   // remove IGNORE characters from the string
779   clean_ignore(w2, w);
780 
781   word = w2.c_str();
782   len = w2.size();
783   usebuffer = true;
784 
785   if (!len)
786     return NULL;
787 
788   // word reversing wrapper for complex prefixes
789   if (complexprefixes) {
790     if (!usebuffer) {
791       w2.assign(word);
792       usebuffer = true;
793     }
794     if (utf8)
795       reverseword_utf(w2);
796     else
797       reverseword(w2);
798   }
799 
800   if (usebuffer) {
801     word = w2.c_str();
802   }
803 
804   // look word in hash table
805   struct hentry* he = NULL;
806   for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
807     he = m_HMgrs[i]->lookup(word);
808 
809     // check forbidden and onlyincompound words
810     if ((he) && (he->astr) && (pAMgr) &&
811         TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
812       if (info)
813         *info += SPELL_FORBIDDEN;
814       // LANG_hu section: set dash information for suggestions
815       if (langnum == LANG_hu) {
816         if (pAMgr->get_compoundflag() &&
817             TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
818           if (info)
819             *info += SPELL_COMPOUND;
820         }
821       }
822       return NULL;
823     }
824 
825     // he = next not needaffix, onlyincompound homonym or onlyupcase word
826     while (he && (he->astr) && pAMgr &&
827            ((pAMgr->get_needaffix() &&
828              TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
829             (pAMgr->get_onlyincompound() &&
830              TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
831             (info && (*info & SPELL_INITCAP) &&
832              TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
833       he = he->next_homonym;
834   }
835 
836   // check with affixes
837   if (!he && pAMgr) {
838     // try stripping off affixes */
839     he = pAMgr->affix_check(word, len, 0);
840 
841     // check compound restriction and onlyupcase
842     if (he && he->astr &&
843         ((pAMgr->get_onlyincompound() &&
844           TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
845          (info && (*info & SPELL_INITCAP) &&
846           TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
847       he = NULL;
848     }
849 
850     if (he) {
851       if ((he->astr) && (pAMgr) &&
852           TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
853         if (info)
854           *info += SPELL_FORBIDDEN;
855         return NULL;
856       }
857       if (root) {
858         root->assign(he->word);
859         if (complexprefixes) {
860           if (utf8)
861             reverseword_utf(*root);
862           else
863             reverseword(*root);
864         }
865       }
866       // try check compound word
867     } else if (pAMgr->get_compound()) {
868       struct hentry* rwords[100];  // buffer for COMPOUND pattern checking
869       he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
870       // LANG_hu section: `moving rule' with last dash
871       if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
872         std::string dup(word, len - 1);
873         he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info);
874       }
875       // end of LANG specific region
876       if (he) {
877         if (root) {
878           root->assign(he->word);
879           if (complexprefixes) {
880             if (utf8)
881               reverseword_utf(*root);
882             else
883               reverseword(*root);
884           }
885         }
886         if (info)
887           *info += SPELL_COMPOUND;
888       }
889     }
890   }
891 
892   return he;
893 }
894 
suggest(const std::string & word)895 std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
896   bool capwords;
897   size_t abbv;
898   int captype;
899   std::vector<std::string> slst = suggest_internal(word, capwords, abbv, captype);
900   // word reversing wrapper for complex prefixes
901   if (complexprefixes) {
902     for (size_t j = 0; j < slst.size(); ++j) {
903       if (utf8)
904         reverseword_utf(slst[j]);
905       else
906         reverseword(slst[j]);
907     }
908   }
909 
910   // capitalize
911   if (capwords)
912     for (size_t j = 0; j < slst.size(); ++j) {
913       mkinitcap(slst[j]);
914     }
915 
916   // expand suggestions with dot(s)
917   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
918     for (size_t j = 0; j < slst.size(); ++j) {
919       slst[j].append(word.substr(word.size() - abbv));
920     }
921   }
922 
923   // remove bad capitalized and forbidden forms
924   if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
925     switch (captype) {
926       case INITCAP:
927       case ALLCAP: {
928         size_t l = 0;
929         for (size_t j = 0; j < slst.size(); ++j) {
930           if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
931             std::string s;
932             std::vector<w_char> w;
933             if (utf8) {
934               u8_u16(w, slst[j]);
935             } else {
936               s = slst[j];
937             }
938             mkallsmall2(s, w);
939             if (spell(s)) {
940               slst[l] = s;
941               ++l;
942             } else {
943               mkinitcap2(s, w);
944               if (spell(s)) {
945                 slst[l] = s;
946                 ++l;
947               }
948             }
949           } else {
950             slst[l] = slst[j];
951             ++l;
952           }
953         }
954         slst.resize(l);
955       }
956     }
957   }
958 
959   // remove duplications
960   size_t l = 0;
961   for (size_t j = 0; j < slst.size(); ++j) {
962     slst[l] = slst[j];
963     for (size_t k = 0; k < l; ++k) {
964       if (slst[k] == slst[j]) {
965         --l;
966         break;
967       }
968     }
969     ++l;
970   }
971   slst.resize(l);
972 
973   // output conversion
974   RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
975   if (rl) {
976     for (size_t i = 0; rl && i < slst.size(); ++i) {
977       std::string wspace;
978       if (rl->conv(slst[i], wspace)) {
979         slst[i] = wspace;
980       }
981     }
982   }
983   return slst;
984 }
985 
suggest_internal(const std::string & word,bool & capwords,size_t & abbv,int & captype)986 std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word,
987         bool& capwords, size_t& abbv, int& captype) {
988   captype = NOCAP;
989   abbv = 0;
990   capwords = false;
991 
992   std::vector<std::string> slst;
993 
994   int onlycmpdsug = 0;
995   if (!pSMgr || m_HMgrs.empty())
996     return slst;
997 
998   // process XML input of the simplified API (see manual)
999   if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
1000     return spellml(word);
1001   }
1002   if (utf8) {
1003     if (word.size() >= MAXWORDUTF8LEN)
1004       return slst;
1005   } else {
1006     if (word.size() >= MAXWORDLEN)
1007       return slst;
1008   }
1009   size_t wl = 0;
1010 
1011   std::string scw;
1012   std::vector<w_char> sunicw;
1013 
1014   // input conversion
1015   RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1016   {
1017     std::string wspace;
1018 
1019     bool convstatus = rl ? rl->conv(word, wspace) : false;
1020     if (convstatus)
1021       wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1022     else
1023       wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1024 
1025     if (wl == 0)
1026       return slst;
1027   }
1028 
1029   bool good = false;
1030 
1031   clock_t timelimit;
1032   // initialize in every suggestion call
1033   timelimit = clock();
1034 
1035   // check capitalized form for FORCEUCASE
1036   if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
1037     int info = SPELL_ORIGCAP;
1038     if (checkword(scw, &info, NULL)) {
1039       std::string form(scw);
1040       mkinitcap(form);
1041       slst.push_back(form);
1042       return slst;
1043     }
1044   }
1045 
1046   switch (captype) {
1047     case NOCAP: {
1048       good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
1049       if (clock() > timelimit + TIMELIMIT_GLOBAL)
1050           return slst;
1051       if (abbv) {
1052         std::string wspace(scw);
1053         wspace.push_back('.');
1054         good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1055         if (clock() > timelimit + TIMELIMIT_GLOBAL)
1056             return slst;
1057       }
1058       break;
1059     }
1060 
1061     case INITCAP: {
1062       capwords = true;
1063       good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
1064       if (clock() > timelimit + TIMELIMIT_GLOBAL)
1065           return slst;
1066       std::string wspace(scw);
1067       mkallsmall2(wspace, sunicw);
1068       good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1069       if (clock() > timelimit + TIMELIMIT_GLOBAL)
1070           return slst;
1071       break;
1072     }
1073     case HUHINITCAP:
1074       capwords = true;
1075       /* FALLTHROUGH */
1076     case HUHCAP: {
1077       good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
1078       if (clock() > timelimit + TIMELIMIT_GLOBAL)
1079           return slst;
1080       // something.The -> something. The
1081       size_t dot_pos = scw.find('.');
1082       if (dot_pos != std::string::npos) {
1083         std::string postdot = scw.substr(dot_pos + 1);
1084         int captype_;
1085         if (utf8) {
1086           std::vector<w_char> postdotu;
1087           u8_u16(postdotu, postdot);
1088           captype_ = get_captype_utf8(postdotu, langnum);
1089         } else {
1090           captype_ = get_captype(postdot, csconv);
1091         }
1092         if (captype_ == INITCAP) {
1093           std::string str(scw);
1094           str.insert(dot_pos + 1, 1, ' ');
1095           insert_sug(slst, str);
1096         }
1097       }
1098 
1099       std::string wspace;
1100 
1101       if (captype == HUHINITCAP) {
1102         // TheOpenOffice.org -> The OpenOffice.org
1103         wspace = scw;
1104         mkinitsmall2(wspace, sunicw);
1105         good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1106         if (clock() > timelimit + TIMELIMIT_GLOBAL)
1107             return slst;
1108       }
1109       wspace = scw;
1110       mkallsmall2(wspace, sunicw);
1111       if (spell(wspace.c_str()))
1112         insert_sug(slst, wspace);
1113       size_t prevns = slst.size();
1114       good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1115       if (clock() > timelimit + TIMELIMIT_GLOBAL)
1116           return slst;
1117       if (captype == HUHINITCAP) {
1118         mkinitcap2(wspace, sunicw);
1119         if (spell(wspace.c_str()))
1120           insert_sug(slst, wspace);
1121         good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1122         if (clock() > timelimit + TIMELIMIT_GLOBAL)
1123             return slst;
1124       }
1125       // aNew -> "a New" (instead of "a new")
1126       for (size_t j = prevns; j < slst.size(); ++j) {
1127         const char* space = strchr(slst[j].c_str(), ' ');
1128         if (space) {
1129           size_t slen = strlen(space + 1);
1130           // different case after space (need capitalisation)
1131           if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
1132             std::string first(slst[j].c_str(), space + 1);
1133             std::string second(space + 1);
1134             std::vector<w_char> w;
1135             if (utf8)
1136               u8_u16(w, second);
1137             mkinitcap2(second, w);
1138             // set as first suggestion
1139             slst.erase(slst.begin() + j);
1140             slst.insert(slst.begin(), first + second);
1141           }
1142         }
1143       }
1144       break;
1145     }
1146 
1147     case ALLCAP: {
1148       std::string wspace(scw);
1149       mkallsmall2(wspace, sunicw);
1150       good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1151       if (clock() > timelimit + TIMELIMIT_GLOBAL)
1152           return slst;
1153       if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
1154         insert_sug(slst, wspace);
1155       mkinitcap2(wspace, sunicw);
1156       good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
1157       if (clock() > timelimit + TIMELIMIT_GLOBAL)
1158           return slst;
1159       for (size_t j = 0; j < slst.size(); ++j) {
1160         mkallcap(slst[j]);
1161         if (pAMgr && pAMgr->get_checksharps()) {
1162           if (utf8) {
1163             mystrrep(slst[j], "\xC3\x9F", "SS");
1164           } else {
1165             mystrrep(slst[j], "\xDF", "SS");
1166           }
1167         }
1168       }
1169       break;
1170     }
1171   }
1172 
1173   // LANG_hu section: replace '-' with ' ' in Hungarian
1174   if (langnum == LANG_hu) {
1175     for (size_t j = 0; j < slst.size(); ++j) {
1176       size_t pos = slst[j].find('-');
1177       if (pos != std::string::npos) {
1178         int info;
1179         std::string w(slst[j].substr(0, pos));
1180         w.append(slst[j].substr(pos + 1));
1181         (void)spell(w, &info, NULL);
1182         if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
1183           slst[j][pos] = ' ';
1184         } else
1185           slst[j][pos] = '-';
1186       }
1187     }
1188   }
1189   // END OF LANG_hu section
1190   // try ngram approach since found nothing good suggestion
1191   if (!good && pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
1192     switch (captype) {
1193       case NOCAP: {
1194         pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP);
1195         if (clock() > timelimit + TIMELIMIT_GLOBAL)
1196             return slst;
1197         break;
1198       }
1199       /* FALLTHROUGH */
1200       case HUHINITCAP:
1201         capwords = true;
1202       /* FALLTHROUGH */
1203       case HUHCAP: {
1204         std::string wspace(scw);
1205         mkallsmall2(wspace, sunicw);
1206         pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP);
1207         if (clock() > timelimit + TIMELIMIT_GLOBAL)
1208             return slst;
1209         break;
1210       }
1211       case INITCAP: {
1212         capwords = true;
1213         std::string wspace(scw);
1214         mkallsmall2(wspace, sunicw);
1215         pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP);
1216         if (clock() > timelimit + TIMELIMIT_GLOBAL)
1217             return slst;
1218         break;
1219       }
1220       case ALLCAP: {
1221         std::string wspace(scw);
1222         mkallsmall2(wspace, sunicw);
1223         size_t oldns = slst.size();
1224         pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP);
1225         if (clock() > timelimit + TIMELIMIT_GLOBAL)
1226             return slst;
1227         for (size_t j = oldns; j < slst.size(); ++j) {
1228           mkallcap(slst[j]);
1229         }
1230         break;
1231       }
1232     }
1233   }
1234 
1235   // try dash suggestion (Afo-American -> Afro-American)
1236   // Note: LibreOffice was modified to treat dashes as word
1237   // characters to check "scot-free" etc. word forms, but
1238   // we need to handle suggestions for "Afo-American", etc.,
1239   // while "Afro-American" is missing from the dictionary.
1240   // TODO avoid possible overgeneration
1241   size_t dash_pos = scw.find('-');
1242   if (dash_pos != std::string::npos) {
1243     int nodashsug = 1;
1244     for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
1245       if (slst[j].find('-') != std::string::npos)
1246         nodashsug = 0;
1247     }
1248 
1249     size_t prev_pos = 0;
1250     bool last = false;
1251 
1252     while (!good && nodashsug && !last) {
1253       if (dash_pos == scw.size())
1254         last = 1;
1255       std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
1256       if (!spell(chunk.c_str())) {
1257         std::vector<std::string> nlst = suggest(chunk.c_str());
1258         if (clock() > timelimit + TIMELIMIT_GLOBAL)
1259             return slst;
1260         for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) {
1261           std::string wspace = scw.substr(0, prev_pos);
1262           wspace.append(*j);
1263           if (!last) {
1264             wspace.append("-");
1265             wspace.append(scw.substr(dash_pos + 1));
1266           }
1267           int info = 0;
1268           if (pAMgr && pAMgr->get_forbiddenword())
1269             checkword(wspace, &info, NULL);
1270           if (!(info & SPELL_FORBIDDEN))
1271             insert_sug(slst, wspace);
1272         }
1273         nodashsug = 0;
1274       }
1275       if (!last) {
1276         prev_pos = dash_pos + 1;
1277         dash_pos = scw.find('-', prev_pos);
1278       }
1279       if (dash_pos == std::string::npos)
1280         dash_pos = scw.size();
1281     }
1282   }
1283   return slst;
1284 }
1285 
get_dict_encoding() const1286 const std::string& HunspellImpl::get_dict_encoding() const {
1287   return encoding;
1288 }
1289 
stem(const std::vector<std::string> & desc)1290 std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
1291   std::vector<std::string> slst;
1292 
1293   std::string result2;
1294   if (desc.empty())
1295     return slst;
1296   for (size_t i = 0; i < desc.size(); ++i) {
1297 
1298     std::string result;
1299 
1300     // add compound word parts (except the last one)
1301     const char* s = desc[i].c_str();
1302     const char* part = strstr(s, MORPH_PART);
1303     if (part) {
1304       const char* nextpart = strstr(part + 1, MORPH_PART);
1305       while (nextpart) {
1306         std::string field;
1307         copy_field(field, part, MORPH_PART);
1308         result.append(field);
1309         part = nextpart;
1310         nextpart = strstr(part + 1, MORPH_PART);
1311       }
1312       s = part;
1313     }
1314 
1315     std::string tok(s);
1316     size_t alt = 0;
1317     while ((alt = tok.find(" | ", alt)) != std::string::npos) {
1318       tok[alt + 1] = MSEP_ALT;
1319     }
1320     std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
1321     for (size_t k = 0; k < pl.size(); ++k) {
1322       // add derivational suffixes
1323       if (pl[k].find(MORPH_DERI_SFX) != std::string::npos) {
1324         // remove inflectional suffixes
1325         const size_t is = pl[k].find(MORPH_INFL_SFX);
1326         if (is != std::string::npos)
1327           pl[k].resize(is);
1328         std::vector<std::string> singlepl;
1329         singlepl.push_back(pl[k]);
1330         std::string sg = pSMgr->suggest_gen(singlepl, pl[k]);
1331         if (!sg.empty()) {
1332           std::vector<std::string> gen = line_tok(sg, MSEP_REC);
1333           for (size_t j = 0; j < gen.size(); ++j) {
1334             result2.push_back(MSEP_REC);
1335             result2.append(result);
1336             result2.append(gen[j]);
1337           }
1338         }
1339       } else {
1340         result2.push_back(MSEP_REC);
1341         result2.append(result);
1342         if (pl[k].find(MORPH_SURF_PFX) != std::string::npos) {
1343           std::string field;
1344           copy_field(field, pl[k], MORPH_SURF_PFX);
1345           result2.append(field);
1346         }
1347         std::string field;
1348         copy_field(field, pl[k], MORPH_STEM);
1349         result2.append(field);
1350       }
1351     }
1352   }
1353   slst = line_tok(result2, MSEP_REC);
1354   uniqlist(slst);
1355   return slst;
1356 }
1357 
stem(const std::string & word)1358 std::vector<std::string> HunspellImpl::stem(const std::string& word) {
1359   return stem(analyze(word));
1360 }
1361 
get_wordchars_cpp() const1362 const std::string& HunspellImpl::get_wordchars_cpp() const {
1363   return pAMgr->get_wordchars();
1364 }
1365 
get_wordchars_utf16() const1366 const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
1367   return pAMgr->get_wordchars_utf16();
1368 }
1369 
mkinitcap(std::string & u8)1370 void HunspellImpl::mkinitcap(std::string& u8) {
1371   if (utf8) {
1372     std::vector<w_char> u16;
1373     u8_u16(u16, u8);
1374     ::mkinitcap_utf(u16, langnum);
1375     u16_u8(u8, u16);
1376   } else {
1377     ::mkinitcap(u8, csconv);
1378   }
1379 }
1380 
mkinitcap2(std::string & u8,std::vector<w_char> & u16)1381 int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
1382   if (utf8) {
1383     ::mkinitcap_utf(u16, langnum);
1384     u16_u8(u8, u16);
1385   } else {
1386     ::mkinitcap(u8, csconv);
1387   }
1388   return u8.size();
1389 }
1390 
mkinitsmall2(std::string & u8,std::vector<w_char> & u16)1391 int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
1392   if (utf8) {
1393     ::mkinitsmall_utf(u16, langnum);
1394     u16_u8(u8, u16);
1395   } else {
1396     ::mkinitsmall(u8, csconv);
1397   }
1398   return u8.size();
1399 }
1400 
add(const std::string & word)1401 int HunspellImpl::add(const std::string& word) {
1402   if (!m_HMgrs.empty())
1403     return m_HMgrs[0]->add(word);
1404   return 0;
1405 }
1406 
add_with_affix(const std::string & word,const std::string & example)1407 int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
1408   if (!m_HMgrs.empty())
1409     return m_HMgrs[0]->add_with_affix(word, example);
1410   return 0;
1411 }
1412 
remove(const std::string & word)1413 int HunspellImpl::remove(const std::string& word) {
1414   if (!m_HMgrs.empty())
1415     return m_HMgrs[0]->remove(word);
1416   return 0;
1417 }
1418 
get_version_cpp() const1419 const std::string& HunspellImpl::get_version_cpp() const {
1420   return pAMgr->get_version();
1421 }
1422 
get_csconv()1423 struct cs_info* HunspellImpl::get_csconv() {
1424   return csconv;
1425 }
1426 
cat_result(std::string & result,const std::string & st)1427 void HunspellImpl::cat_result(std::string& result, const std::string& st) {
1428   if (!st.empty()) {
1429     if (!result.empty())
1430       result.append("\n");
1431     result.append(st);
1432   }
1433 }
1434 
analyze(const std::string & word)1435 std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
1436   std::vector<std::string> slst = analyze_internal(word);
1437   // output conversion
1438   RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1439   if (rl) {
1440     for (size_t i = 0; rl && i < slst.size(); ++i) {
1441       std::string wspace;
1442       if (rl->conv(slst[i], wspace)) {
1443         slst[i] = wspace;
1444       }
1445     }
1446   }
1447   return slst;
1448 }
1449 
analyze_internal(const std::string & word)1450 std::vector<std::string> HunspellImpl::analyze_internal(const std::string& word) {
1451   std::vector<std::string> slst;
1452   if (!pSMgr || m_HMgrs.empty())
1453     return slst;
1454   if (utf8) {
1455     if (word.size() >= MAXWORDUTF8LEN)
1456       return slst;
1457   } else {
1458     if (word.size() >= MAXWORDLEN)
1459       return slst;
1460   }
1461   int captype = NOCAP;
1462   size_t abbv = 0;
1463   size_t wl = 0;
1464 
1465   std::string scw;
1466   std::vector<w_char> sunicw;
1467 
1468   // input conversion
1469   RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1470   {
1471     std::string wspace;
1472 
1473     bool convstatus = rl ? rl->conv(word, wspace) : false;
1474     if (convstatus)
1475       wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
1476     else
1477       wl = cleanword2(scw, sunicw, word, &captype, &abbv);
1478   }
1479 
1480   if (wl == 0) {
1481     if (abbv) {
1482       scw.clear();
1483       for (wl = 0; wl < abbv; wl++)
1484         scw.push_back('.');
1485       abbv = 0;
1486     } else
1487       return slst;
1488   }
1489 
1490   std::string result;
1491 
1492   size_t n = 0;
1493   // test numbers
1494   // LANG_hu section: set dash information for suggestions
1495   if (langnum == LANG_hu) {
1496     size_t n2 = 0;
1497     size_t n3 = 0;
1498 
1499     while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
1500                         (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
1501       n++;
1502       if ((scw[n] == '.') || (scw[n] == ',')) {
1503         if (((n2 == 0) && (n > 3)) ||
1504             ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
1505           break;
1506         n2++;
1507         n3 = n;
1508       }
1509     }
1510 
1511     if ((n == wl) && (n3 > 0) && (n - n3 > 3))
1512       return slst;
1513     if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
1514                       checkword(scw.substr(n), NULL, NULL))) {
1515       result.append(scw);
1516       result.resize(n - 1);
1517       if (n == wl)
1518         cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
1519       else {
1520         std::string chunk = scw.substr(n - 1, 1);
1521         cat_result(result, pSMgr->suggest_morph(chunk));
1522         result.push_back('+');  // XXX SPEC. MORPHCODE
1523         cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
1524       }
1525       return line_tok(result, MSEP_REC);
1526     }
1527   }
1528   // END OF LANG_hu section
1529 
1530   switch (captype) {
1531     case HUHCAP:
1532     case HUHINITCAP:
1533     case NOCAP: {
1534       cat_result(result, pSMgr->suggest_morph(scw));
1535       if (abbv) {
1536         std::string u8buffer(scw);
1537         u8buffer.push_back('.');
1538         cat_result(result, pSMgr->suggest_morph(u8buffer));
1539       }
1540       break;
1541     }
1542     case INITCAP: {
1543       mkallsmall2(scw, sunicw);
1544       std::string u8buffer(scw);
1545       mkinitcap2(scw, sunicw);
1546       cat_result(result, pSMgr->suggest_morph(u8buffer));
1547       cat_result(result, pSMgr->suggest_morph(scw));
1548       if (abbv) {
1549         u8buffer.push_back('.');
1550         cat_result(result, pSMgr->suggest_morph(u8buffer));
1551 
1552         u8buffer = scw;
1553         u8buffer.push_back('.');
1554 
1555         cat_result(result, pSMgr->suggest_morph(u8buffer));
1556       }
1557       break;
1558     }
1559     case ALLCAP: {
1560       cat_result(result, pSMgr->suggest_morph(scw));
1561       if (abbv) {
1562         std::string u8buffer(scw);
1563         u8buffer.push_back('.');
1564         cat_result(result, pSMgr->suggest_morph(u8buffer));
1565       }
1566       mkallsmall2(scw, sunicw);
1567       std::string u8buffer(scw);
1568       mkinitcap2(scw, sunicw);
1569 
1570       cat_result(result, pSMgr->suggest_morph(u8buffer));
1571       cat_result(result, pSMgr->suggest_morph(scw));
1572       if (abbv) {
1573         u8buffer.push_back('.');
1574         cat_result(result, pSMgr->suggest_morph(u8buffer));
1575 
1576         u8buffer = scw;
1577         u8buffer.push_back('.');
1578 
1579         cat_result(result, pSMgr->suggest_morph(u8buffer));
1580       }
1581       break;
1582     }
1583   }
1584 
1585   if (!result.empty()) {
1586     // word reversing wrapper for complex prefixes
1587     if (complexprefixes) {
1588       if (utf8)
1589         reverseword_utf(result);
1590       else
1591         reverseword(result);
1592     }
1593     return line_tok(result, MSEP_REC);
1594   }
1595 
1596   // compound word with dash (HU) I18n
1597   // LANG_hu section: set dash information for suggestions
1598 
1599   size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
1600   if (dash_pos != std::string::npos) {
1601     int nresult = 0;
1602 
1603     std::string part1 = scw.substr(0, dash_pos);
1604     std::string part2 = scw.substr(dash_pos+1);
1605 
1606     // examine 2 sides of the dash
1607     if (part2.empty()) {  // base word ending with dash
1608       if (spell(part1)) {
1609         std::string p = pSMgr->suggest_morph(part1);
1610         if (!p.empty()) {
1611           slst = line_tok(p, MSEP_REC);
1612           return slst;
1613         }
1614       }
1615     } else if (part2.size() == 1 && part2[0] == 'e') {  // XXX (HU) -e hat.
1616       if (spell(part1) && (spell("-e"))) {
1617         std::string st = pSMgr->suggest_morph(part1);
1618         if (!st.empty()) {
1619           result.append(st);
1620         }
1621         result.push_back('+');  // XXX spec. separator in MORPHCODE
1622         st = pSMgr->suggest_morph("-e");
1623         if (!st.empty()) {
1624           result.append(st);
1625         }
1626         return line_tok(result, MSEP_REC);
1627       }
1628     } else {
1629       // first word ending with dash: word- XXX ???
1630       part1.push_back(' ');
1631       nresult = spell(part1);
1632       part1.erase(part1.size() - 1);
1633       if (nresult && spell(part2) &&
1634           ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
1635         std::string st = pSMgr->suggest_morph(part1);
1636         if (!st.empty()) {
1637           result.append(st);
1638           result.push_back('+');  // XXX spec. separator in MORPHCODE
1639         }
1640         st = pSMgr->suggest_morph(part2);
1641         if (!st.empty()) {
1642           result.append(st);
1643         }
1644         return line_tok(result, MSEP_REC);
1645       }
1646     }
1647     // affixed number in correct word
1648     if (nresult && (dash_pos > 0) &&
1649         (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
1650          (scw[dash_pos - 1] == '.'))) {
1651       n = 1;
1652       if (scw[dash_pos - n] == '.')
1653         n++;
1654       // search first not a number character to left from dash
1655       while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
1656              (n < 6)) {
1657         n++;
1658       }
1659       if (dash_pos < n)
1660         n--;
1661       // numbers: valami1000000-hoz
1662       // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1663       // 56-hoz, 6-hoz
1664       for (; n >= 1; n--) {
1665         if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
1666             continue;
1667         }
1668         std::string chunk = scw.substr(dash_pos - n);
1669         if (checkword(chunk, NULL, NULL)) {
1670           result.append(chunk);
1671           std::string st = pSMgr->suggest_morph(chunk);
1672           if (!st.empty()) {
1673             result.append(st);
1674           }
1675           return line_tok(result, MSEP_REC);
1676         }
1677       }
1678     }
1679   }
1680   return slst;
1681 }
1682 
generate(const std::string & word,const std::vector<std::string> & pl)1683 std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
1684   std::vector<std::string> slst;
1685   if (!pSMgr || pl.empty())
1686     return slst;
1687   std::vector<std::string> pl2 = analyze(word);
1688   int captype = NOCAP;
1689   int abbv = 0;
1690   std::string cw;
1691   cleanword(cw, word, &captype, &abbv);
1692   std::string result;
1693 
1694   for (size_t i = 0; i < pl.size(); ++i) {
1695     cat_result(result, pSMgr->suggest_gen(pl2, pl[i]));
1696   }
1697 
1698   if (!result.empty()) {
1699     // allcap
1700     if (captype == ALLCAP)
1701       mkallcap(result);
1702 
1703     // line split
1704     slst = line_tok(result, MSEP_REC);
1705 
1706     // capitalize
1707     if (captype == INITCAP || captype == HUHINITCAP) {
1708       for (size_t j = 0; j < slst.size(); ++j) {
1709         mkinitcap(slst[j]);
1710       }
1711     }
1712 
1713     // temporary filtering of prefix related errors (eg.
1714     // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1715     std::vector<std::string>::iterator it = slst.begin();
1716     while (it != slst.end()) {
1717       if (!spell(*it)) {
1718         it = slst.erase(it);
1719       } else  {
1720         ++it;
1721       }
1722     }
1723   }
1724   return slst;
1725 }
1726 
generate(const std::string & word,const std::string & pattern)1727 std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
1728   std::vector<std::string> pl = analyze(pattern);
1729   std::vector<std::string> slst = generate(word, pl);
1730   uniqlist(slst);
1731   return slst;
1732 }
1733 
1734 // minimal XML parser functions
get_xml_par(const char * par)1735 std::string HunspellImpl::get_xml_par(const char* par) {
1736   std::string dest;
1737   if (!par)
1738     return dest;
1739   char end = *par;
1740   if (end == '>')
1741     end = '<';
1742   else if (end != '\'' && end != '"')
1743     return dest;  // bad XML
1744   for (par++; *par != '\0' && *par != end; ++par) {
1745     dest.push_back(*par);
1746   }
1747   mystrrep(dest, "&lt;", "<");
1748   mystrrep(dest, "&amp;", "&");
1749   return dest;
1750 }
1751 
get_langnum() const1752 int HunspellImpl::get_langnum() const {
1753   return langnum;
1754 }
1755 
input_conv(const std::string & word,std::string & dest)1756 bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
1757   RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
1758   if (rl) {
1759     return rl->conv(word, dest);
1760   }
1761   dest.assign(word);
1762   return false;
1763 }
1764 
1765 // return the beginning of the element (attr == NULL) or the attribute
get_xml_pos(const char * s,const char * attr)1766 const char* HunspellImpl::get_xml_pos(const char* s, const char* attr) {
1767   const char* end = strchr(s, '>');
1768   if (attr == NULL)
1769     return end;
1770   const char* p = s;
1771   while (1) {
1772     p = strstr(p, attr);
1773     if (!p || p >= end)
1774       return 0;
1775     if (*(p - 1) == ' ' || *(p - 1) == '\n')
1776       break;
1777     p += strlen(attr);
1778   }
1779   return p + strlen(attr);
1780 }
1781 
check_xml_par(const char * q,const char * attr,const char * value)1782 int HunspellImpl::check_xml_par(const char* q,
1783                             const char* attr,
1784                             const char* value) {
1785   std::string cw = get_xml_par(get_xml_pos(q, attr));
1786   if (cw == value)
1787     return 1;
1788   return 0;
1789 }
1790 
get_xml_list(const char * list,const char * tag)1791 std::vector<std::string> HunspellImpl::get_xml_list(const char* list, const char* tag) {
1792   std::vector<std::string> slst;
1793   if (!list)
1794     return slst;
1795   const char* p = list;
1796   for (size_t n = 0; ((p = strstr(p, tag)) != NULL); ++p, ++n) {
1797     std::string cw = get_xml_par(p + strlen(tag) - 1);
1798     if (cw.empty()) {
1799       break;
1800     }
1801     slst.push_back(cw);
1802   }
1803   return slst;
1804 }
1805 
spellml(const std::string & in_word)1806 std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
1807   std::vector<std::string> slst;
1808 
1809   const char* word = in_word.c_str();
1810 
1811   const char* q = strstr(word, "<query");
1812   if (!q)
1813     return slst;  // bad XML input
1814   const char* q2 = strchr(q, '>');
1815   if (!q2)
1816     return slst;  // bad XML input
1817   q2 = strstr(q2, "<word");
1818   if (!q2)
1819     return slst;  // bad XML input
1820   if (check_xml_par(q, "type=", "analyze")) {
1821     std::string cw = get_xml_par(strchr(q2, '>'));
1822     if (!cw.empty())
1823       slst = analyze(cw);
1824     if (slst.empty())
1825       return slst;
1826     // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1827     std::string r;
1828     r.append("<code>");
1829     for (size_t i = 0; i < slst.size(); ++i) {
1830       r.append("<a>");
1831 
1832       std::string entry(slst[i]);
1833       mystrrep(entry, "\t", " ");
1834       mystrrep(entry, "&", "&amp;");
1835       mystrrep(entry, "<", "&lt;");
1836       r.append(entry);
1837 
1838       r.append("</a>");
1839     }
1840     r.append("</code>");
1841     slst.clear();
1842     slst.push_back(r);
1843     return slst;
1844   } else if (check_xml_par(q, "type=", "stem")) {
1845     std::string cw = get_xml_par(strchr(q2, '>'));
1846     if (!cw.empty())
1847       return stem(cw);
1848   } else if (check_xml_par(q, "type=", "generate")) {
1849     std::string cw = get_xml_par(strchr(q2, '>'));
1850     if (cw.empty())
1851       return slst;
1852     const char* q3 = strstr(q2 + 1, "<word");
1853     if (q3) {
1854       std::string cw2 = get_xml_par(strchr(q3, '>'));
1855       if (!cw2.empty()) {
1856         return generate(cw, cw2);
1857       }
1858     } else {
1859       if ((q2 = strstr(q2 + 1, "<code")) != NULL) {
1860         std::vector<std::string> slst2 = get_xml_list(strchr(q2, '>'), "<a>");
1861         if (!slst2.empty()) {
1862           slst = generate(cw, slst2);
1863           uniqlist(slst);
1864           return slst;
1865         }
1866       }
1867     }
1868   } else if (check_xml_par(q, "type=", "add")) {
1869     std::string cw = get_xml_par(strchr(q2, '>'));
1870     if (cw.empty())
1871       return slst;
1872     const char* q3 = strstr(q2 + 1, "<word");
1873     if (q3) {
1874       std::string cw2 = get_xml_par(strchr(q3, '>'));
1875       if (!cw2.empty()) {
1876         add_with_affix(cw, cw2);
1877       } else {
1878         add(cw);
1879       }
1880     } else {
1881         add(cw);
1882     }
1883   }
1884   return slst;
1885 }
1886 
suffix_suggest(const std::string & root_word)1887 std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
1888   std::vector<std::string> slst;
1889   struct hentry* he = NULL;
1890   int len;
1891   std::string w2;
1892   const char* word;
1893   const char* ignoredchars = pAMgr->get_ignore();
1894   if (ignoredchars != NULL) {
1895     w2.assign(root_word);
1896     if (utf8) {
1897       const std::vector<w_char>& ignoredchars_utf16 =
1898           pAMgr->get_ignore_utf16();
1899       remove_ignored_chars_utf(w2, ignoredchars_utf16);
1900     } else {
1901       remove_ignored_chars(w2, ignoredchars);
1902     }
1903     word = w2.c_str();
1904   } else
1905     word = root_word.c_str();
1906 
1907   len = strlen(word);
1908 
1909   if (!len)
1910     return slst;
1911 
1912   for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
1913     he = m_HMgrs[i]->lookup(word);
1914   }
1915   if (he) {
1916     slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());
1917   }
1918   return slst;
1919 }
1920 
1921 namespace {
munge_vector(char *** slst,const std::vector<std::string> & items)1922   int munge_vector(char*** slst, const std::vector<std::string>& items) {
1923     if (items.empty()) {
1924       *slst = NULL;
1925       return 0;
1926     } else {
1927       *slst = (char**)malloc(sizeof(char*) * items.size());
1928       if (!*slst)
1929         return 0;
1930       for (size_t i = 0; i < items.size(); ++i)
1931         (*slst)[i] = mystrdup(items[i].c_str());
1932     }
1933     return items.size();
1934   }
1935 }
1936 
spell(const char * word,int * info,char ** root)1937 int HunspellImpl::spell(const char* word, int* info, char** root) {
1938   std::string sroot;
1939   bool ret = spell(word, info, root ? &sroot : NULL);
1940   if (root) {
1941     if (sroot.empty()) {
1942       *root = NULL;
1943     } else {
1944       *root = mystrdup(sroot.c_str());
1945     }
1946   }
1947   return ret;
1948 }
1949 
suggest(char *** slst,const char * word)1950 int HunspellImpl::suggest(char*** slst, const char* word) {
1951   std::vector<std::string> suggests = suggest(word);
1952   return munge_vector(slst, suggests);
1953 }
1954 
suffix_suggest(char *** slst,const char * root_word)1955 int HunspellImpl::suffix_suggest(char*** slst, const char* root_word) {
1956   std::vector<std::string> stems = suffix_suggest(root_word);
1957   return munge_vector(slst, stems);
1958 }
1959 
free_list(char *** slst,int n)1960 void HunspellImpl::free_list(char*** slst, int n) {
1961   if (slst && *slst) {
1962     for (int i = 0; i < n; i++)
1963       free((*slst)[i]);
1964     free(*slst);
1965     *slst = NULL;
1966   }
1967 }
1968 
get_dic_encoding()1969 char* HunspellImpl::get_dic_encoding() {
1970   return &encoding[0];
1971 }
1972 
analyze(char *** slst,const char * word)1973 int HunspellImpl::analyze(char*** slst, const char* word) {
1974   std::vector<std::string> stems = analyze(word);
1975   return munge_vector(slst, stems);
1976 }
1977 
stem(char *** slst,const char * word)1978 int HunspellImpl::stem(char*** slst, const char* word) {
1979   std::vector<std::string> stems = stem(word);
1980   return munge_vector(slst, stems);
1981 }
1982 
stem(char *** slst,char ** desc,int n)1983 int HunspellImpl::stem(char*** slst, char** desc, int n) {
1984   std::vector<std::string> morph;
1985   for (int i = 0; i < n; ++i)
1986     morph.push_back(desc[i]);
1987 
1988   std::vector<std::string> stems = stem(morph);
1989   return munge_vector(slst, stems);
1990 }
1991 
generate(char *** slst,const char * word,const char * pattern)1992 int HunspellImpl::generate(char*** slst, const char* word, const char* pattern) {
1993   std::vector<std::string> stems = generate(word, pattern);
1994   return munge_vector(slst, stems);
1995 }
1996 
generate(char *** slst,const char * word,char ** pl,int pln)1997 int HunspellImpl::generate(char*** slst, const char* word, char** pl, int pln) {
1998   std::vector<std::string> morph;
1999   for (int i = 0; i < pln; ++i)
2000     morph.push_back(pl[i]);
2001 
2002   std::vector<std::string> stems = generate(word, morph);
2003   return munge_vector(slst, stems);
2004 }
2005 
get_wordchars() const2006 const char* HunspellImpl::get_wordchars() const {
2007   return get_wordchars_cpp().c_str();
2008 }
2009 
get_version() const2010 const char* HunspellImpl::get_version() const {
2011   return get_version_cpp().c_str();
2012 }
2013 
input_conv(const char * word,char * dest,size_t destsize)2014 int HunspellImpl::input_conv(const char* word, char* dest, size_t destsize) {
2015   std::string d;
2016   bool ret = input_conv(word, d);
2017   if (ret && d.size() < destsize) {
2018     strncpy(dest, d.c_str(), destsize);
2019     return 1;
2020   }
2021   return 0;
2022 }
2023 
Hunspell(const char * affpath,const char * dpath,const char * key)2024 Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
2025   : m_Impl(new HunspellImpl(affpath, dpath, key)) {
2026 }
2027 
~Hunspell()2028 Hunspell::~Hunspell() {
2029   delete m_Impl;
2030 }
2031 
2032 // load extra dictionaries
add_dic(const char * dpath,const char * key)2033 int Hunspell::add_dic(const char* dpath, const char* key) {
2034   return m_Impl->add_dic(dpath, key);
2035 }
2036 
spell(const std::string & word,int * info,std::string * root)2037 bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
2038   return m_Impl->spell(word, info, root);
2039 }
2040 
suggest(const std::string & word)2041 std::vector<std::string> Hunspell::suggest(const std::string& word) {
2042   return m_Impl->suggest(word);
2043 }
2044 
suffix_suggest(const std::string & root_word)2045 std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
2046   return m_Impl->suffix_suggest(root_word);
2047 }
2048 
get_dict_encoding() const2049 const std::string& Hunspell::get_dict_encoding() const {
2050   return m_Impl->get_dict_encoding();
2051 }
2052 
stem(const std::vector<std::string> & desc)2053 std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
2054   return m_Impl->stem(desc);
2055 }
2056 
stem(const std::string & word)2057 std::vector<std::string> Hunspell::stem(const std::string& word) {
2058   return m_Impl->stem(word);
2059 }
2060 
get_wordchars_cpp() const2061 const std::string& Hunspell::get_wordchars_cpp() const {
2062   return m_Impl->get_wordchars_cpp();
2063 }
2064 
get_wordchars_utf16() const2065 const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
2066   return m_Impl->get_wordchars_utf16();
2067 }
2068 
add(const std::string & word)2069 int Hunspell::add(const std::string& word) {
2070   return m_Impl->add(word);
2071 }
2072 
add_with_affix(const std::string & word,const std::string & example)2073 int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
2074   return m_Impl->add_with_affix(word, example);
2075 }
2076 
remove(const std::string & word)2077 int Hunspell::remove(const std::string& word) {
2078   return m_Impl->remove(word);
2079 }
2080 
get_version_cpp() const2081 const std::string& Hunspell::get_version_cpp() const {
2082   return m_Impl->get_version_cpp();
2083 }
2084 
get_csconv()2085 struct cs_info* Hunspell::get_csconv() {
2086   return m_Impl->get_csconv();
2087 }
2088 
analyze(const std::string & word)2089 std::vector<std::string> Hunspell::analyze(const std::string& word) {
2090   return m_Impl->analyze(word);
2091 }
2092 
generate(const std::string & word,const std::vector<std::string> & pl)2093 std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
2094   return m_Impl->generate(word, pl);
2095 }
2096 
generate(const std::string & word,const std::string & pattern)2097 std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
2098   return m_Impl->generate(word, pattern);
2099 }
2100 
get_langnum() const2101 int Hunspell::get_langnum() const {
2102   return m_Impl->get_langnum();
2103 }
2104 
input_conv(const std::string & word,std::string & dest)2105 bool Hunspell::input_conv(const std::string& word, std::string& dest) {
2106   return m_Impl->input_conv(word, dest);
2107 }
2108 
spell(const char * word,int * info,char ** root)2109 int Hunspell::spell(const char* word, int* info, char** root) {
2110   return m_Impl->spell(word, info, root);
2111 }
2112 
suggest(char *** slst,const char * word)2113 int Hunspell::suggest(char*** slst, const char* word) {
2114   return m_Impl->suggest(slst, word);
2115 }
2116 
suffix_suggest(char *** slst,const char * root_word)2117 int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
2118   return m_Impl->suffix_suggest(slst, root_word);
2119 }
2120 
free_list(char *** slst,int n)2121 void Hunspell::free_list(char*** slst, int n) {
2122   m_Impl->free_list(slst, n);
2123 }
2124 
get_dic_encoding()2125 char* Hunspell::get_dic_encoding() {
2126   return m_Impl->get_dic_encoding();
2127 }
2128 
analyze(char *** slst,const char * word)2129 int Hunspell::analyze(char*** slst, const char* word) {
2130   return m_Impl->analyze(slst, word);
2131 }
2132 
stem(char *** slst,const char * word)2133 int Hunspell::stem(char*** slst, const char* word) {
2134   return m_Impl->stem(slst, word);
2135 }
2136 
stem(char *** slst,char ** desc,int n)2137 int Hunspell::stem(char*** slst, char** desc, int n) {
2138   return m_Impl->stem(slst, desc, n);
2139 }
2140 
generate(char *** slst,const char * word,const char * pattern)2141 int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
2142   return m_Impl->generate(slst, word, pattern);
2143 }
2144 
generate(char *** slst,const char * word,char ** pl,int pln)2145 int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
2146   return m_Impl->generate(slst, word, pl, pln);
2147 }
2148 
get_wordchars() const2149 const char* Hunspell::get_wordchars() const {
2150   return m_Impl->get_wordchars();
2151 }
2152 
get_version() const2153 const char* Hunspell::get_version() const {
2154   return m_Impl->get_version();
2155 }
2156 
input_conv(const char * word,char * dest,size_t destsize)2157 int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
2158   return m_Impl->input_conv(word, dest, destsize);
2159 }
2160 
Hunspell_create(const char * affpath,const char * dpath)2161 Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
2162   return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath));
2163 }
2164 
Hunspell_create_key(const char * affpath,const char * dpath,const char * key)2165 Hunhandle* Hunspell_create_key(const char* affpath,
2166                                const char* dpath,
2167                                const char* key) {
2168   return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath, key));
2169 }
2170 
Hunspell_destroy(Hunhandle * pHunspell)2171 void Hunspell_destroy(Hunhandle* pHunspell) {
2172   delete reinterpret_cast<HunspellImpl*>(pHunspell);
2173 }
2174 
Hunspell_add_dic(Hunhandle * pHunspell,const char * dpath)2175 int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
2176   return reinterpret_cast<HunspellImpl*>(pHunspell)->add_dic(dpath);
2177 }
2178 
Hunspell_spell(Hunhandle * pHunspell,const char * word)2179 int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
2180   return reinterpret_cast<HunspellImpl*>(pHunspell)->spell(word);
2181 }
2182 
Hunspell_get_dic_encoding(Hunhandle * pHunspell)2183 char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
2184   return reinterpret_cast<HunspellImpl*>(pHunspell)->get_dic_encoding();
2185 }
2186 
Hunspell_suggest(Hunhandle * pHunspell,char *** slst,const char * word)2187 int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
2188   return reinterpret_cast<HunspellImpl*>(pHunspell)->suggest(slst, word);
2189 }
2190 
Hunspell_analyze(Hunhandle * pHunspell,char *** slst,const char * word)2191 int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
2192   return reinterpret_cast<HunspellImpl*>(pHunspell)->analyze(slst, word);
2193 }
2194 
Hunspell_stem(Hunhandle * pHunspell,char *** slst,const char * word)2195 int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
2196   return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, word);
2197 }
2198 
Hunspell_stem2(Hunhandle * pHunspell,char *** slst,char ** desc,int n)2199 int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
2200   return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, desc, n);
2201 }
2202 
Hunspell_generate(Hunhandle * pHunspell,char *** slst,const char * word,const char * pattern)2203 int Hunspell_generate(Hunhandle* pHunspell,
2204                       char*** slst,
2205                       const char* word,
2206                       const char* pattern)
2207 {
2208   return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, pattern);
2209 }
2210 
Hunspell_generate2(Hunhandle * pHunspell,char *** slst,const char * word,char ** desc,int n)2211 int Hunspell_generate2(Hunhandle* pHunspell,
2212                        char*** slst,
2213                        const char* word,
2214                        char** desc,
2215                        int n)
2216 {
2217   return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, desc, n);
2218 }
2219 
2220 /* functions for run-time modification of the dictionary */
2221 
2222 /* add word to the run-time dictionary */
2223 
Hunspell_add(Hunhandle * pHunspell,const char * word)2224 int Hunspell_add(Hunhandle* pHunspell, const char* word) {
2225   return reinterpret_cast<HunspellImpl*>(pHunspell)->add(word);
2226 }
2227 
2228 /* add word to the run-time dictionary with affix flags of
2229  * the example (a dictionary word): Hunspell will recognize
2230  * affixed forms of the new word, too.
2231  */
2232 
Hunspell_add_with_affix(Hunhandle * pHunspell,const char * word,const char * example)2233 int Hunspell_add_with_affix(Hunhandle* pHunspell,
2234                             const char* word,
2235                             const char* example) {
2236   return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_affix(word, example);
2237 }
2238 
2239 /* remove word from the run-time dictionary */
2240 
Hunspell_remove(Hunhandle * pHunspell,const char * word)2241 int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
2242   return reinterpret_cast<HunspellImpl*>(pHunspell)->remove(word);
2243 }
2244 
Hunspell_free_list(Hunhandle * pHunspell,char *** list,int n)2245 void Hunspell_free_list(Hunhandle* pHunspell, char*** list, int n) {
2246   reinterpret_cast<HunspellImpl*>(pHunspell)->free_list(list, n);
2247 }
2248