1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * Copyright (C) 2002-2017 Németh László
5  *
6  * The contents of this file are subject to the Mozilla Public License Version
7  * 1.1 (the "License"); you may not use this file except in compliance with
8  * the License. You may obtain a copy of the License at
9  * http://www.mozilla.org/MPL/
10  *
11  * Software distributed under the License is distributed on an "AS IS" basis,
12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13  * for the specific language governing rights and limitations under the
14  * License.
15  *
16  * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17  *
18  * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19  * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20  * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21  * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22  * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23  *
24  * Alternatively, the contents of this file may be used under the terms of
25  * either the GNU General Public License Version 2 or later (the "GPL"), or
26  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27  * in which case the provisions of the GPL or the LGPL are applicable instead
28  * of those above. If you wish to allow use of your version of this file only
29  * under the terms of either the GPL or the LGPL, and not to allow others to
30  * use your version of this file under the terms of the MPL, indicate your
31  * decision by deleting the provisions above and replace them with the notice
32  * and other provisions required by the GPL or the LGPL. If you do not delete
33  * the provisions above, a recipient may use your version of this file under
34  * the terms of any one of the MPL, the GPL or the LGPL.
35  *
36  * ***** END LICENSE BLOCK ***** */
37 /*
38  * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39  * And Contributors.  All rights reserved.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  *
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  *
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  *
52  * 3. All modifications to the source code must be clearly marked as
53  *    such.  Binary redistributions based on modified source code
54  *    must be clearly marked as modified versions in the documentation
55  *    and/or other materials provided with the distribution.
56  *
57  * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
61  * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  */
70 
71 #include <stdlib.h>
72 #include <string.h>
73 #include <stdio.h>
74 #include <ctype.h>
75 #include <limits>
76 #include <sstream>
77 
78 #include "hashmgr.hxx"
79 #include "csutil.hxx"
80 #include "atypes.hxx"
81 #include "langnum.hxx"
82 
83 // build a hash table from a munched word list
84 
HashMgr(const char * tpath,const char * apath,const char * key)85 HashMgr::HashMgr(const char* tpath, const char* apath, const char* key)
86     : tablesize(0),
87       tableptr(NULL),
88       flag_mode(FLAG_CHAR),
89       complexprefixes(0),
90       utf8(0),
91       forbiddenword(FORBIDDENWORD)  // forbidden word signing flag
92       ,
93       numaliasf(0),
94       aliasf(NULL),
95       aliasflen(0),
96       numaliasm(0),
97       aliasm(NULL) {
98   langnum = 0;
99   csconv = 0;
100   load_config(apath, key);
101   int ec = load_tables(tpath, key);
102   if (ec) {
103     /* error condition - what should we do here */
104     HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n", ec);
105     free(tableptr);
106     //keep tablesize to 1 to fix possible division with zero
107     tablesize = 1;
108     tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*));
109     if (!tableptr) {
110       tablesize = 0;
111     }
112   }
113 }
114 
~HashMgr()115 HashMgr::~HashMgr() {
116   if (tableptr) {
117     // now pass through hash table freeing up everything
118     // go through column by column of the table
119     for (int i = 0; i < tablesize; i++) {
120       struct hentry* pt = tableptr[i];
121       struct hentry* nt = NULL;
122       while (pt) {
123         nt = pt->next;
124         if (pt->astr &&
125             (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen)))
126           free(pt->astr);
127         free(pt);
128         pt = nt;
129       }
130     }
131     free(tableptr);
132   }
133   tablesize = 0;
134 
135   if (aliasf) {
136     for (int j = 0; j < (numaliasf); j++)
137       free(aliasf[j]);
138     free(aliasf);
139     aliasf = NULL;
140     if (aliasflen) {
141       free(aliasflen);
142       aliasflen = NULL;
143     }
144   }
145   if (aliasm) {
146     for (int j = 0; j < (numaliasm); j++)
147       free(aliasm[j]);
148     free(aliasm);
149     aliasm = NULL;
150   }
151 
152 #ifndef OPENOFFICEORG
153 #ifndef MOZILLA_CLIENT
154   if (utf8)
155     free_utf_tbl();
156 #endif
157 #endif
158 
159 #ifdef MOZILLA_CLIENT
160   delete[] csconv;
161 #endif
162 }
163 
164 // lookup a root word in the hashtable
165 
lookup(const char * word) const166 struct hentry* HashMgr::lookup(const char* word) const {
167   struct hentry* dp;
168   if (tableptr) {
169     dp = tableptr[hash(word)];
170     if (!dp)
171       return NULL;
172     for (; dp != NULL; dp = dp->next) {
173       if (strcmp(word, dp->word) == 0)
174         return dp;
175     }
176   }
177   return NULL;
178 }
179 
180 // add a word to the hash table (private)
add_word(const std::string & in_word,int wcl,unsigned short * aff,int al,const std::string * in_desc,bool onlyupcase,int captype)181 int HashMgr::add_word(const std::string& in_word,
182                       int wcl,
183                       unsigned short* aff,
184                       int al,
185                       const std::string* in_desc,
186                       bool onlyupcase,
187                       int captype) {
188   const std::string* word = &in_word;
189   const std::string* desc = in_desc;
190 
191   std::string *word_copy = NULL;
192   std::string *desc_copy = NULL;
193   if ((!ignorechars.empty() && !has_no_ignored_chars(in_word, ignorechars)) || complexprefixes) {
194     word_copy = new std::string(in_word);
195 
196     if (!ignorechars.empty()) {
197       if (utf8) {
198         wcl = remove_ignored_chars_utf(*word_copy, ignorechars_utf16);
199       } else {
200         remove_ignored_chars(*word_copy, ignorechars);
201       }
202     }
203 
204     if (complexprefixes) {
205       if (utf8)
206         wcl = reverseword_utf(*word_copy);
207       else
208         reverseword(*word_copy);
209 
210       if (in_desc && !aliasm) {
211         desc_copy = new std::string(*in_desc);
212 
213         if (complexprefixes) {
214           if (utf8)
215             reverseword_utf(*desc_copy);
216           else
217             reverseword(*desc_copy);
218         }
219         desc = desc_copy;
220       }
221     }
222 
223     word = word_copy;
224   }
225 
226   bool upcasehomonym = false;
227   int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0;
228   // variable-length hash record with word and optional fields
229   struct hentry* hp =
230       (struct hentry*)malloc(sizeof(struct hentry) + word->size() + descl);
231   if (!hp) {
232     delete desc_copy;
233     delete word_copy;
234     return 1;
235   }
236 
237   char* hpw = hp->word;
238   strcpy(hpw, word->c_str());
239 
240   int i = hash(hpw);
241 
242   hp->blen = (unsigned char)word->size();
243   hp->clen = (unsigned char)wcl;
244   hp->alen = (short)al;
245   hp->astr = aff;
246   hp->next = NULL;
247   hp->next_homonym = NULL;
248   hp->var = (captype == INITCAP) ? H_OPT_INITCAP : 0;
249 
250   // store the description string or its pointer
251   if (desc) {
252     hp->var += H_OPT;
253     if (aliasm) {
254       hp->var += H_OPT_ALIASM;
255       store_pointer(hpw + word->size() + 1, get_aliasm(atoi(desc->c_str())));
256     } else {
257       strcpy(hpw + word->size() + 1, desc->c_str());
258     }
259     if (strstr(HENTRY_DATA(hp), MORPH_PHON)) {
260       hp->var += H_OPT_PHON;
261       // store ph: fields (pronounciation, misspellings, old orthography etc.)
262       // of a morphological description in reptable to use in REP replacements.
263       if (reptable.capacity() < (unsigned int)(tablesize/MORPH_PHON_RATIO))
264           reptable.reserve(tablesize/MORPH_PHON_RATIO);
265       std::string fields = HENTRY_DATA(hp);
266       std::string::const_iterator iter = fields.begin();
267       std::string::const_iterator start_piece = mystrsep(fields, iter);
268       while (start_piece != fields.end()) {
269         if (std::string(start_piece, iter).find(MORPH_PHON) == 0) {
270           std::string ph = std::string(start_piece, iter).substr(sizeof MORPH_PHON - 1);
271           if (ph.size() > 0) {
272             std::vector<w_char> w;
273             size_t strippatt;
274             std::string wordpart;
275             // dictionary based REP replacement, separated by "->"
276             // for example "pretty ph:prity ph:priti->pretti" to handle
277             // both prity -> pretty and pritier -> prettiest suggestions.
278             if (((strippatt = ph.find("->")) != std::string::npos) &&
279                     (strippatt > 0) && (strippatt < ph.size() - 2)) {
280                 wordpart = ph.substr(strippatt + 2);
281                 ph.erase(ph.begin() + strippatt, ph.end());
282             } else
283                 wordpart = in_word;
284             // when the ph: field ends with the character *,
285             // strip last character of the pattern and the replacement
286             // to match in REP suggestions also at character changes,
287             // for example, "pretty ph:prity*" results "prit->prett"
288             // REP replacement instead of "prity->pretty", to get
289             // prity->pretty and pritiest->prettiest suggestions.
290             if (ph.at(ph.size()-1) == '*') {
291               strippatt = 1;
292               size_t stripword = 0;
293               if (utf8) {
294                 while ((strippatt < ph.size()) &&
295                   ((ph.at(ph.size()-strippatt-1) & 0xc0) == 0x80))
296                      ++strippatt;
297                 while ((stripword < wordpart.size()) &&
298                   ((wordpart.at(wordpart.size()-stripword-1) & 0xc0) == 0x80))
299                      ++stripword;
300               }
301               ++strippatt;
302               ++stripword;
303               if ((ph.size() > strippatt) && (wordpart.size() > stripword)) {
304                 ph.erase(ph.size()-strippatt, strippatt);
305                 wordpart.erase(in_word.size()-stripword, stripword);
306               }
307             }
308             // capitalize lowercase pattern for capitalized words to support
309             // good suggestions also for capitalized misspellings, eg.
310             // Wednesday ph:wendsay
311             // results wendsay -> Wednesday and Wendsay -> Wednesday, too.
312             if (captype==INITCAP) {
313               std::string ph_capitalized;
314               if (utf8) {
315                 u8_u16(w, ph);
316                 if (get_captype_utf8(w, langnum) == NOCAP) {
317                   mkinitcap_utf(w, langnum);
318                   u16_u8(ph_capitalized, w);
319                 }
320               } else if (get_captype(ph, csconv) == NOCAP)
321                   mkinitcap(ph_capitalized, csconv);
322 
323               if (ph_capitalized.size() > 0) {
324                 // add also lowercase word in the case of German or
325                 // Hungarian to support lowercase suggestions lowercased by
326                 // compound word generation or derivational suffixes
327                 // (for example by adjectival suffix "-i" of geographical
328                 // names in Hungarian:
329                 // Massachusetts ph:messzecsuzec
330                 // messzecsuzeci -> massachusettsi (adjective)
331                 // For lowercasing by conditional PFX rules, see
332                 // tests/germancompounding test example or the
333                 // Hungarian dictionary.)
334                 if (langnum == LANG_de || langnum == LANG_hu) {
335                   std::string wordpart_lower(wordpart);
336                   if (utf8) {
337                     u8_u16(w, wordpart_lower);
338                     mkallsmall_utf(w, langnum);
339                     u16_u8(wordpart_lower, w);
340                   } else {
341                     mkallsmall(wordpart_lower, csconv);
342                   }
343                   reptable.push_back(replentry());
344                   reptable.back().pattern.assign(ph);
345                   reptable.back().outstrings[0].assign(wordpart_lower);
346                 }
347                 reptable.push_back(replentry());
348                 reptable.back().pattern.assign(ph_capitalized);
349                 reptable.back().outstrings[0].assign(wordpart);
350               }
351             }
352             reptable.push_back(replentry());
353             reptable.back().pattern.assign(ph);
354             reptable.back().outstrings[0].assign(wordpart);
355           }
356         }
357         start_piece = mystrsep(fields, iter);
358       }
359     }
360   }
361 
362   struct hentry* dp = tableptr[i];
363   if (!dp) {
364     tableptr[i] = hp;
365     delete desc_copy;
366     delete word_copy;
367     return 0;
368   }
369   while (dp->next != NULL) {
370     if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
371       // remove hidden onlyupcase homonym
372       if (!onlyupcase) {
373         if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
374           free(dp->astr);
375           dp->astr = hp->astr;
376           dp->alen = hp->alen;
377           free(hp);
378           delete desc_copy;
379           delete word_copy;
380           return 0;
381         } else {
382           dp->next_homonym = hp;
383         }
384       } else {
385         upcasehomonym = true;
386       }
387     }
388     dp = dp->next;
389   }
390   if (strcmp(hp->word, dp->word) == 0) {
391     // remove hidden onlyupcase homonym
392     if (!onlyupcase) {
393       if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
394         free(dp->astr);
395         dp->astr = hp->astr;
396         dp->alen = hp->alen;
397         free(hp);
398         delete desc_copy;
399         delete word_copy;
400         return 0;
401       } else {
402         dp->next_homonym = hp;
403       }
404     } else {
405       upcasehomonym = true;
406     }
407   }
408   if (!upcasehomonym) {
409     dp->next = hp;
410   } else {
411     // remove hidden onlyupcase homonym
412     if (hp->astr)
413       free(hp->astr);
414     free(hp);
415   }
416 
417   delete desc_copy;
418   delete word_copy;
419   return 0;
420 }
421 
add_hidden_capitalized_word(const std::string & word,int wcl,unsigned short * flags,int flagslen,const std::string * dp,int captype)422 int HashMgr::add_hidden_capitalized_word(const std::string& word,
423                                          int wcl,
424                                          unsigned short* flags,
425                                          int flagslen,
426                                          const std::string* dp,
427                                          int captype) {
428   if (flags == NULL)
429     flagslen = 0;
430 
431   // add inner capitalized forms to handle the following allcap forms:
432   // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
433   // Allcaps with suffixes: CIA's -> CIA'S
434   if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
435        ((captype == ALLCAP) && (flagslen != 0))) &&
436       !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) {
437     unsigned short* flags2 =
438         (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1));
439     if (!flags2)
440       return 1;
441     if (flagslen)
442       memcpy(flags2, flags, flagslen * sizeof(unsigned short));
443     flags2[flagslen] = ONLYUPCASEFLAG;
444     if (utf8) {
445       std::string st;
446       std::vector<w_char> w;
447       u8_u16(w, word);
448       mkallsmall_utf(w, langnum);
449       mkinitcap_utf(w, langnum);
450       u16_u8(st, w);
451       return add_word(st, wcl, flags2, flagslen + 1, dp, true, INITCAP);
452     } else {
453       std::string new_word(word);
454       mkallsmall(new_word, csconv);
455       mkinitcap(new_word, csconv);
456       int ret = add_word(new_word, wcl, flags2, flagslen + 1, dp, true, INITCAP);
457       return ret;
458     }
459   }
460   return 0;
461 }
462 
463 // detect captype and modify word length for UTF-8 encoding
get_clen_and_captype(const std::string & word,int * captype,std::vector<w_char> & workbuf)464 int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) {
465   int len;
466   if (utf8) {
467     len = u8_u16(workbuf, word);
468     *captype = get_captype_utf8(workbuf, langnum);
469   } else {
470     len = word.size();
471     *captype = get_captype(word, csconv);
472   }
473   return len;
474 }
475 
get_clen_and_captype(const std::string & word,int * captype)476 int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
477   std::vector<w_char> workbuf;
478   return get_clen_and_captype(word, captype, workbuf);
479 }
480 
481 // remove word (personal dictionary function for standalone applications)
remove(const std::string & word)482 int HashMgr::remove(const std::string& word) {
483   struct hentry* dp = lookup(word.c_str());
484   while (dp) {
485     if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
486       unsigned short* flags =
487           (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1));
488       if (!flags)
489         return 1;
490       for (int i = 0; i < dp->alen; i++)
491         flags[i] = dp->astr[i];
492       flags[dp->alen] = forbiddenword;
493       free(dp->astr);
494       dp->astr = flags;
495       dp->alen++;
496       std::sort(flags, flags + dp->alen);
497     }
498     dp = dp->next_homonym;
499   }
500   return 0;
501 }
502 
503 /* remove forbidden flag to add a personal word to the hash */
remove_forbidden_flag(const std::string & word)504 int HashMgr::remove_forbidden_flag(const std::string& word) {
505   struct hentry* dp = lookup(word.c_str());
506   if (!dp)
507     return 1;
508   while (dp) {
509     if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen))
510       dp->alen = 0;  // XXX forbidden words of personal dic.
511     dp = dp->next_homonym;
512   }
513   return 0;
514 }
515 
516 // add a custom dic. word to the hash table (public)
add(const std::string & word)517 int HashMgr::add(const std::string& word) {
518   if (remove_forbidden_flag(word)) {
519     int captype;
520     int al = 0;
521     unsigned short* flags = NULL;
522     int wcl = get_clen_and_captype(word, &captype);
523     add_word(word, wcl, flags, al, NULL, false, captype);
524     return add_hidden_capitalized_word(word, wcl, flags, al, NULL,
525                                        captype);
526   }
527   return 0;
528 }
529 
add_with_affix(const std::string & word,const std::string & example)530 int HashMgr::add_with_affix(const std::string& word, const std::string& example) {
531   // detect captype and modify word length for UTF-8 encoding
532   struct hentry* dp = lookup(example.c_str());
533   remove_forbidden_flag(word);
534   if (dp && dp->astr) {
535     int captype;
536     int wcl = get_clen_and_captype(word, &captype);
537     if (aliasf) {
538       add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype);
539     } else {
540       unsigned short* flags =
541           (unsigned short*)malloc(dp->alen * sizeof(unsigned short));
542       if (flags) {
543         memcpy((void*)flags, (void*)dp->astr,
544                dp->alen * sizeof(unsigned short));
545         add_word(word, wcl, flags, dp->alen, NULL, false, captype);
546       } else
547         return 1;
548     }
549     return add_hidden_capitalized_word(word, wcl, dp->astr,
550                                        dp->alen, NULL, captype);
551   }
552   return 1;
553 }
554 
555 // walk the hash table entry by entry - null at end
556 // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
walk_hashtable(int & col,struct hentry * hp) const557 struct hentry* HashMgr::walk_hashtable(int& col, struct hentry* hp) const {
558   if (hp && hp->next != NULL)
559     return hp->next;
560   for (col++; col < tablesize; col++) {
561     if (tableptr[col])
562       return tableptr[col];
563   }
564   // null at end and reset to start
565   col = -1;
566   return NULL;
567 }
568 
569 // load a munched word list and build a hash table on the fly
load_tables(const char * tpath,const char * key)570 int HashMgr::load_tables(const char* tpath, const char* key) {
571   // open dictionary file
572   FileMgr* dict = new FileMgr(tpath, key);
573   if (dict == NULL)
574     return 1;
575 
576   // first read the first line of file to get hash table size */
577   std::string ts;
578   if (!dict->getline(ts)) {
579     HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath);
580     delete dict;
581     return 2;
582   }
583   mychomp(ts);
584 
585   /* remove byte order mark */
586   if (ts.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {
587     ts.erase(0, 3);
588   }
589 
590   tablesize = atoi(ts.c_str());
591 
592   int nExtra = 5 + USERWORD;
593 
594   if (tablesize <= 0 ||
595       (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) /
596                         int(sizeof(struct hentry*)))) {
597     HUNSPELL_WARNING(
598         stderr, "error: line 1: missing or bad word count in the dic file\n");
599     delete dict;
600     return 4;
601   }
602   tablesize += nExtra;
603   if ((tablesize % 2) == 0)
604     tablesize++;
605 
606   // allocate the hash table
607   tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*));
608   if (!tableptr) {
609     delete dict;
610     return 3;
611   }
612 
613   // loop through all words on much list and add to hash
614   // table and create word and affix strings
615 
616   std::vector<w_char> workbuf;
617 
618   while (dict->getline(ts)) {
619     mychomp(ts);
620     // split each line into word and morphological description
621     size_t dp_pos = 0;
622     while ((dp_pos = ts.find(':', dp_pos)) != std::string::npos) {
623       if ((dp_pos > 3) && (ts[dp_pos - 3] == ' ' || ts[dp_pos - 3] == '\t')) {
624         for (dp_pos -= 3; dp_pos > 0 && (ts[dp_pos-1] == ' ' || ts[dp_pos-1] == '\t'); --dp_pos)
625           ;
626         if (dp_pos == 0) {  // missing word
627           dp_pos = std::string::npos;
628         } else {
629           ++dp_pos;
630         }
631         break;
632       }
633       ++dp_pos;
634     }
635 
636     // tabulator is the old morphological field separator
637     size_t dp2_pos = ts.find('\t');
638     if (dp2_pos != std::string::npos && (dp_pos == std::string::npos || dp2_pos < dp_pos)) {
639       dp_pos = dp2_pos + 1;
640     }
641 
642     std::string dp;
643     if (dp_pos != std::string::npos) {
644       dp.assign(ts.substr(dp_pos));
645       ts.resize(dp_pos - 1);
646     }
647 
648     // split each line into word and affix char strings
649     // "\/" signs slash in words (not affix separator)
650     // "/" at beginning of the line is word character (not affix separator)
651     size_t ap_pos = ts.find('/');
652     while (ap_pos != std::string::npos) {
653       if (ap_pos == 0) {
654         ++ap_pos;
655         continue;
656       } else if (ts[ap_pos - 1] != '\\')
657         break;
658       // replace "\/" with "/"
659       ts.erase(ap_pos - 1, 1);
660       ap_pos = ts.find('/', ap_pos);
661     }
662 
663     unsigned short* flags;
664     int al;
665     if (ap_pos != std::string::npos && ap_pos != ts.size()) {
666       std::string ap(ts.substr(ap_pos + 1));
667       ts.resize(ap_pos);
668       if (aliasf) {
669         int index = atoi(ap.c_str());
670         al = get_aliasf(index, &flags, dict);
671         if (!al) {
672           HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n",
673                            dict->getlinenum());
674         }
675       } else {
676         al = decode_flags(&flags, ap.c_str(), dict);
677         if (al == -1) {
678           HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
679           delete dict;
680           return 6;
681         }
682         std::sort(flags, flags + al);
683       }
684     } else {
685       al = 0;
686       flags = NULL;
687     }
688 
689     int captype;
690     int wcl = get_clen_and_captype(ts, &captype, workbuf);
691     const std::string *dp_str = dp.empty() ? NULL : &dp;
692     // add the word and its index plus its capitalized form optionally
693     if (add_word(ts, wcl, flags, al, dp_str, false, captype) ||
694         add_hidden_capitalized_word(ts, wcl, flags, al, dp_str, captype)) {
695       delete dict;
696       return 5;
697     }
698   }
699 
700   delete dict;
701   return 0;
702 }
703 
704 // the hash function is a simple load and rotate
705 // algorithm borrowed
hash(const char * word) const706 int HashMgr::hash(const char* word) const {
707   unsigned long hv = 0;
708   for (int i = 0; i < 4 && *word != 0; i++)
709     hv = (hv << 8) | (*word++);
710   while (*word != 0) {
711     ROTATE(hv, ROTATE_LEN);
712     hv ^= (*word++);
713   }
714   return (unsigned long)hv % tablesize;
715 }
716 
decode_flags(unsigned short ** result,const std::string & flags,FileMgr * af) const717 int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const {
718   int len;
719   if (flags.empty()) {
720     *result = NULL;
721     return 0;
722   }
723   switch (flag_mode) {
724     case FLAG_LONG: {  // two-character flags (1x2yZz -> 1x 2y Zz)
725       len = flags.size();
726       if (len % 2 == 1)
727         HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
728                          af->getlinenum());
729       len /= 2;
730       *result = (unsigned short*)malloc(len * sizeof(unsigned short));
731       if (!*result)
732         return -1;
733       for (int i = 0; i < len; i++) {
734         (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) +
735                        (unsigned char)flags[i * 2 + 1];
736       }
737       break;
738     }
739     case FLAG_NUM: {  // decimal numbers separated by comma (4521,23,233 -> 4521
740                       // 23 233)
741       len = 1;
742       unsigned short* dest;
743       for (size_t i = 0; i < flags.size(); ++i) {
744         if (flags[i] == ',')
745           len++;
746       }
747       *result = (unsigned short*)malloc(len * sizeof(unsigned short));
748       if (!*result)
749         return -1;
750       dest = *result;
751       const char* src = flags.c_str();
752       for (const char* p = src; *p; p++) {
753         if (*p == ',') {
754           int i = atoi(src);
755           if (i >= DEFAULTFLAGS)
756             HUNSPELL_WARNING(
757                 stderr, "error: line %d: flag id %d is too large (max: %d)\n",
758                 af->getlinenum(), i, DEFAULTFLAGS - 1);
759           *dest = (unsigned short)i;
760           if (*dest == 0)
761             HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
762                              af->getlinenum());
763           src = p + 1;
764           dest++;
765         }
766       }
767       int i = atoi(src);
768       if (i >= DEFAULTFLAGS)
769         HUNSPELL_WARNING(stderr,
770                          "error: line %d: flag id %d is too large (max: %d)\n",
771                          af->getlinenum(), i, DEFAULTFLAGS - 1);
772       *dest = (unsigned short)i;
773       if (*dest == 0)
774         HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
775                          af->getlinenum());
776       break;
777     }
778     case FLAG_UNI: {  // UTF-8 characters
779       std::vector<w_char> w;
780       u8_u16(w, flags);
781       len = w.size();
782       *result = (unsigned short*)malloc(len * sizeof(unsigned short));
783       if (!*result)
784         return -1;
785       memcpy(*result, &w[0], len * sizeof(short));
786       break;
787     }
788     default: {  // Ispell's one-character flags (erfg -> e r f g)
789       unsigned short* dest;
790       len = flags.size();
791       *result = (unsigned short*)malloc(len * sizeof(unsigned short));
792       if (!*result)
793         return -1;
794       dest = *result;
795       for (size_t i = 0; i < flags.size(); ++i) {
796         *dest = (unsigned char)flags[i];
797         dest++;
798       }
799     }
800   }
801   return len;
802 }
803 
decode_flags(std::vector<unsigned short> & result,const std::string & flags,FileMgr * af) const804 bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const {
805   if (flags.empty()) {
806     return false;
807   }
808   switch (flag_mode) {
809     case FLAG_LONG: {  // two-character flags (1x2yZz -> 1x 2y Zz)
810       size_t len = flags.size();
811       if (len % 2 == 1)
812         HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
813                          af->getlinenum());
814       len /= 2;
815       result.reserve(result.size() + len);
816       for (size_t i = 0; i < len; ++i) {
817         result.push_back(((unsigned short)((unsigned char)flags[i * 2]) << 8) +
818                          (unsigned char)flags[i * 2 + 1]);
819       }
820       break;
821     }
822     case FLAG_NUM: {  // decimal numbers separated by comma (4521,23,233 -> 4521
823                       // 23 233)
824       const char* src = flags.c_str();
825       for (const char* p = src; *p; p++) {
826         if (*p == ',') {
827           int i = atoi(src);
828           if (i >= DEFAULTFLAGS)
829             HUNSPELL_WARNING(
830                 stderr, "error: line %d: flag id %d is too large (max: %d)\n",
831                 af->getlinenum(), i, DEFAULTFLAGS - 1);
832           result.push_back((unsigned short)i);
833           if (result.back() == 0)
834             HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
835                              af->getlinenum());
836           src = p + 1;
837         }
838       }
839       int i = atoi(src);
840       if (i >= DEFAULTFLAGS)
841         HUNSPELL_WARNING(stderr,
842                          "error: line %d: flag id %d is too large (max: %d)\n",
843                          af->getlinenum(), i, DEFAULTFLAGS - 1);
844       result.push_back((unsigned short)i);
845       if (result.back() == 0)
846         HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
847                          af->getlinenum());
848       break;
849     }
850     case FLAG_UNI: {  // UTF-8 characters
851       std::vector<w_char> w;
852       u8_u16(w, flags);
853       size_t len = w.size();
854       size_t origsize = result.size();
855       result.resize(origsize + len);
856       memcpy(&result[origsize], &w[0], len * sizeof(short));
857       break;
858     }
859     default: {  // Ispell's one-character flags (erfg -> e r f g)
860       result.reserve(flags.size());
861       for (size_t i = 0; i < flags.size(); ++i) {
862         result.push_back((unsigned char)flags[i]);
863       }
864     }
865   }
866   return true;
867 }
868 
decode_flag(const char * f) const869 unsigned short HashMgr::decode_flag(const char* f) const {
870   unsigned short s = 0;
871   int i;
872   switch (flag_mode) {
873     case FLAG_LONG:
874       s = ((unsigned short)((unsigned char)f[0]) << 8) + (unsigned char)f[1];
875       break;
876     case FLAG_NUM:
877       i = atoi(f);
878       if (i >= DEFAULTFLAGS)
879         HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n",
880                          i, DEFAULTFLAGS - 1);
881       s = (unsigned short)i;
882       break;
883     case FLAG_UNI: {
884       std::vector<w_char> w;
885       u8_u16(w, f);
886       if (!w.empty())
887           memcpy(&s, &w[0], 1 * sizeof(short));
888       break;
889     }
890     default:
891       s = *(unsigned char*)f;
892   }
893   if (s == 0)
894     HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
895   return s;
896 }
897 
encode_flag(unsigned short f) const898 char* HashMgr::encode_flag(unsigned short f) const {
899   if (f == 0)
900     return mystrdup("(NULL)");
901   std::string ch;
902   if (flag_mode == FLAG_LONG) {
903     ch.push_back((unsigned char)(f >> 8));
904     ch.push_back((unsigned char)(f - ((f >> 8) << 8)));
905   } else if (flag_mode == FLAG_NUM) {
906     std::ostringstream stream;
907     stream << f;
908     ch = stream.str();
909   } else if (flag_mode == FLAG_UNI) {
910     const w_char* w_c = (const w_char*)&f;
911     std::vector<w_char> w(w_c, w_c + 1);
912     u16_u8(ch, w);
913   } else {
914     ch.push_back((unsigned char)(f));
915   }
916   return mystrdup(ch.c_str());
917 }
918 
919 // read in aff file and set flag mode
load_config(const char * affpath,const char * key)920 int HashMgr::load_config(const char* affpath, const char* key) {
921   int firstline = 1;
922 
923   // open the affix file
924   FileMgr* afflst = new FileMgr(affpath, key);
925   if (!afflst) {
926     HUNSPELL_WARNING(
927         stderr, "Error - could not open affix description file %s\n", affpath);
928     return 1;
929   }
930 
931   // read in each line ignoring any that do not
932   // start with a known line type indicator
933 
934   std::string line;
935   while (afflst->getline(line)) {
936     mychomp(line);
937 
938     /* remove byte order mark */
939     if (firstline) {
940       firstline = 0;
941       if (line.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {
942         line.erase(0, 3);
943       }
944     }
945 
946     /* parse in the try string */
947     if ((line.compare(0, 4, "FLAG", 4) == 0) && line.size() > 4 && isspace(line[4])) {
948       if (flag_mode != FLAG_CHAR) {
949         HUNSPELL_WARNING(stderr,
950                          "error: line %d: multiple definitions of the FLAG "
951                          "affix file parameter\n",
952                          afflst->getlinenum());
953       }
954       if (line.find("long") != std::string::npos)
955         flag_mode = FLAG_LONG;
956       if (line.find("num") != std::string::npos)
957         flag_mode = FLAG_NUM;
958       if (line.find("UTF-8") != std::string::npos)
959         flag_mode = FLAG_UNI;
960       if (flag_mode == FLAG_CHAR) {
961         HUNSPELL_WARNING(
962             stderr,
963             "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n",
964             afflst->getlinenum());
965       }
966     }
967 
968     if (line.compare(0, 13, "FORBIDDENWORD", 13) == 0) {
969       std::string st;
970       if (!parse_string(line, st, afflst->getlinenum())) {
971         delete afflst;
972         return 1;
973       }
974       forbiddenword = decode_flag(st.c_str());
975     }
976 
977     if (line.compare(0, 3, "SET", 3) == 0) {
978       if (!parse_string(line, enc, afflst->getlinenum())) {
979         delete afflst;
980         return 1;
981       }
982       if (enc == "UTF-8") {
983         utf8 = 1;
984 #ifndef OPENOFFICEORG
985 #ifndef MOZILLA_CLIENT
986         initialize_utf_tbl();
987 #endif
988 #endif
989       } else
990         csconv = get_current_cs(enc);
991     }
992 
993     if (line.compare(0, 4, "LANG", 4) == 0) {
994       if (!parse_string(line, lang, afflst->getlinenum())) {
995         delete afflst;
996         return 1;
997       }
998       langnum = get_lang_num(lang);
999     }
1000 
1001     /* parse in the ignored characters (for example, Arabic optional diacritics
1002      * characters */
1003     if (line.compare(0, 6, "IGNORE", 6) == 0) {
1004       if (!parse_array(line, ignorechars, ignorechars_utf16,
1005                        utf8, afflst->getlinenum())) {
1006         delete afflst;
1007         return 1;
1008       }
1009     }
1010 
1011     if ((line.compare(0, 2, "AF", 2) == 0) && line.size() > 2 && isspace(line[2])) {
1012       if (!parse_aliasf(line, afflst)) {
1013         delete afflst;
1014         return 1;
1015       }
1016     }
1017 
1018     if ((line.compare(0, 2, "AM", 2) == 0) && line.size() > 2 && isspace(line[2])) {
1019       if (!parse_aliasm(line, afflst)) {
1020         delete afflst;
1021         return 1;
1022       }
1023     }
1024 
1025     if (line.compare(0, 15, "COMPLEXPREFIXES", 15) == 0)
1026       complexprefixes = 1;
1027 
1028     /* parse in the typical fault correcting table */
1029     if (line.compare(0, 3, "REP", 3) == 0) {
1030       if (!parse_reptable(line, afflst)) {
1031         delete afflst;
1032         return 1;
1033       }
1034     }
1035 
1036     // don't check the full affix file, yet
1037     if (((line.compare(0, 3, "SFX", 3) == 0) ||
1038          (line.compare(0, 3, "PFX", 3) == 0)) &&
1039             line.size() > 3 && isspace(line[3]) &&
1040             !reptable.empty()) // (REP table is in the end of Afrikaans aff file)
1041       break;
1042   }
1043 
1044   if (csconv == NULL)
1045     csconv = get_current_cs(SPELL_ENCODING);
1046   delete afflst;
1047   return 0;
1048 }
1049 
1050 /* parse in the ALIAS table */
parse_aliasf(const std::string & line,FileMgr * af)1051 bool HashMgr::parse_aliasf(const std::string& line, FileMgr* af) {
1052   if (numaliasf != 0) {
1053     HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
1054                      af->getlinenum());
1055     return false;
1056   }
1057   int i = 0;
1058   int np = 0;
1059   std::string::const_iterator iter = line.begin();
1060   std::string::const_iterator start_piece = mystrsep(line, iter);
1061   while (start_piece != line.end()) {
1062     switch (i) {
1063       case 0: {
1064         np++;
1065         break;
1066       }
1067       case 1: {
1068         numaliasf = atoi(std::string(start_piece, iter).c_str());
1069         if (numaliasf < 1) {
1070           numaliasf = 0;
1071           aliasf = NULL;
1072           aliasflen = NULL;
1073           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
1074                            af->getlinenum());
1075           return false;
1076         }
1077         aliasf =
1078             (unsigned short**)malloc(numaliasf * sizeof(unsigned short*));
1079         aliasflen =
1080             (unsigned short*)malloc(numaliasf * sizeof(unsigned short));
1081         if (!aliasf || !aliasflen) {
1082           numaliasf = 0;
1083           if (aliasf)
1084             free(aliasf);
1085           if (aliasflen)
1086             free(aliasflen);
1087           aliasf = NULL;
1088           aliasflen = NULL;
1089           return false;
1090         }
1091         np++;
1092         break;
1093       }
1094       default:
1095         break;
1096     }
1097     ++i;
1098     start_piece = mystrsep(line, iter);
1099   }
1100   if (np != 2) {
1101     numaliasf = 0;
1102     free(aliasf);
1103     free(aliasflen);
1104     aliasf = NULL;
1105     aliasflen = NULL;
1106     HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
1107                      af->getlinenum());
1108     return false;
1109   }
1110 
1111   /* now parse the numaliasf lines to read in the remainder of the table */
1112   for (int j = 0; j < numaliasf; j++) {
1113     std::string nl;
1114     if (!af->getline(nl))
1115       return false;
1116     mychomp(nl);
1117     i = 0;
1118     aliasf[j] = NULL;
1119     aliasflen[j] = 0;
1120     iter = nl.begin();
1121     start_piece = mystrsep(nl, iter);
1122     while (start_piece != nl.end()) {
1123       switch (i) {
1124         case 0: {
1125           if (nl.compare(start_piece - nl.begin(), 2, "AF", 2) != 0) {
1126             numaliasf = 0;
1127             free(aliasf);
1128             free(aliasflen);
1129             aliasf = NULL;
1130             aliasflen = NULL;
1131             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1132                              af->getlinenum());
1133             return false;
1134           }
1135           break;
1136         }
1137         case 1: {
1138           std::string piece(start_piece, iter);
1139           aliasflen[j] =
1140               (unsigned short)decode_flags(&(aliasf[j]), piece, af);
1141           std::sort(aliasf[j], aliasf[j] + aliasflen[j]);
1142           break;
1143         }
1144         default:
1145           break;
1146       }
1147       ++i;
1148       start_piece = mystrsep(nl, iter);
1149     }
1150     if (!aliasf[j]) {
1151       free(aliasf);
1152       free(aliasflen);
1153       aliasf = NULL;
1154       aliasflen = NULL;
1155       numaliasf = 0;
1156       HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1157                        af->getlinenum());
1158       return false;
1159     }
1160   }
1161   return true;
1162 }
1163 
is_aliasf() const1164 int HashMgr::is_aliasf() const {
1165   return (aliasf != NULL);
1166 }
1167 
get_aliasf(int index,unsigned short ** fvec,FileMgr * af) const1168 int HashMgr::get_aliasf(int index, unsigned short** fvec, FileMgr* af) const {
1169   if ((index > 0) && (index <= numaliasf)) {
1170     *fvec = aliasf[index - 1];
1171     return aliasflen[index - 1];
1172   }
1173   HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n",
1174                    af->getlinenum(), index);
1175   *fvec = NULL;
1176   return 0;
1177 }
1178 
1179 /* parse morph alias definitions */
parse_aliasm(const std::string & line,FileMgr * af)1180 bool HashMgr::parse_aliasm(const std::string& line, FileMgr* af) {
1181   if (numaliasm != 0) {
1182     HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
1183                      af->getlinenum());
1184     return false;
1185   }
1186   int i = 0;
1187   int np = 0;
1188   std::string::const_iterator iter = line.begin();
1189   std::string::const_iterator start_piece = mystrsep(line, iter);
1190   while (start_piece != line.end()) {
1191     switch (i) {
1192       case 0: {
1193         np++;
1194         break;
1195       }
1196       case 1: {
1197         numaliasm = atoi(std::string(start_piece, iter).c_str());
1198         if (numaliasm < 1) {
1199           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
1200                            af->getlinenum());
1201           return false;
1202         }
1203         aliasm = (char**)malloc(numaliasm * sizeof(char*));
1204         if (!aliasm) {
1205           numaliasm = 0;
1206           return false;
1207         }
1208         np++;
1209         break;
1210       }
1211       default:
1212         break;
1213     }
1214     ++i;
1215     start_piece = mystrsep(line, iter);
1216   }
1217   if (np != 2) {
1218     numaliasm = 0;
1219     free(aliasm);
1220     aliasm = NULL;
1221     HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
1222                      af->getlinenum());
1223     return false;
1224   }
1225 
1226   /* now parse the numaliasm lines to read in the remainder of the table */
1227   for (int j = 0; j < numaliasm; j++) {
1228     std::string nl;
1229     if (!af->getline(nl))
1230       return false;
1231     mychomp(nl);
1232     aliasm[j] = NULL;
1233     iter = nl.begin();
1234     i = 0;
1235     start_piece = mystrsep(nl, iter);
1236     while (start_piece != nl.end()) {
1237       switch (i) {
1238         case 0: {
1239           if (nl.compare(start_piece - nl.begin(), 2, "AM", 2) != 0) {
1240             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1241                              af->getlinenum());
1242             numaliasm = 0;
1243             free(aliasm);
1244             aliasm = NULL;
1245             return false;
1246           }
1247           break;
1248         }
1249         case 1: {
1250           // add the remaining of the line
1251           std::string::const_iterator end = nl.end();
1252           std::string chunk(start_piece, end);
1253           if (complexprefixes) {
1254             if (utf8)
1255               reverseword_utf(chunk);
1256             else
1257               reverseword(chunk);
1258           }
1259           aliasm[j] = mystrdup(chunk.c_str());
1260           break;
1261         }
1262         default:
1263           break;
1264       }
1265       ++i;
1266       start_piece = mystrsep(nl, iter);
1267     }
1268     if (!aliasm[j]) {
1269       numaliasm = 0;
1270       free(aliasm);
1271       aliasm = NULL;
1272       HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1273                        af->getlinenum());
1274       return false;
1275     }
1276   }
1277   return true;
1278 }
1279 
is_aliasm() const1280 int HashMgr::is_aliasm() const {
1281   return (aliasm != NULL);
1282 }
1283 
get_aliasm(int index) const1284 char* HashMgr::get_aliasm(int index) const {
1285   if ((index > 0) && (index <= numaliasm))
1286     return aliasm[index - 1];
1287   HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);
1288   return NULL;
1289 }
1290 
1291 /* parse in the typical fault correcting table */
parse_reptable(const std::string & line,FileMgr * af)1292 bool HashMgr::parse_reptable(const std::string& line, FileMgr* af) {
1293   if (!reptable.empty()) {
1294     HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
1295                      af->getlinenum());
1296     return false;
1297   }
1298   int numrep = -1;
1299   int i = 0;
1300   int np = 0;
1301   std::string::const_iterator iter = line.begin();
1302   std::string::const_iterator start_piece = mystrsep(line, iter);
1303   while (start_piece != line.end()) {
1304     switch (i) {
1305       case 0: {
1306         np++;
1307         break;
1308       }
1309       case 1: {
1310         numrep = atoi(std::string(start_piece, iter).c_str());
1311         if (numrep < 1) {
1312           HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n",
1313                            af->getlinenum());
1314           return false;
1315         }
1316         reptable.reserve(numrep);
1317         np++;
1318         break;
1319       }
1320       default:
1321         break;
1322     }
1323     ++i;
1324     start_piece = mystrsep(line, iter);
1325   }
1326   if (np != 2) {
1327     HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
1328                      af->getlinenum());
1329     return false;
1330   }
1331 
1332   /* now parse the numrep lines to read in the remainder of the table */
1333   for (int j = 0; j < numrep; ++j) {
1334     std::string nl;
1335     if (!af->getline(nl))
1336       return false;
1337     mychomp(nl);
1338     reptable.push_back(replentry());
1339     iter = nl.begin();
1340     i = 0;
1341     int type = 0;
1342     start_piece = mystrsep(nl, iter);
1343     while (start_piece != nl.end()) {
1344       switch (i) {
1345         case 0: {
1346           if (nl.compare(start_piece - nl.begin(), 3, "REP", 3) != 0) {
1347             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1348                              af->getlinenum());
1349             reptable.clear();
1350             return false;
1351           }
1352           break;
1353         }
1354         case 1: {
1355           if (*start_piece == '^')
1356             type = 1;
1357           reptable.back().pattern.assign(start_piece + type, iter);
1358           mystrrep(reptable.back().pattern, "_", " ");
1359           if (!reptable.back().pattern.empty() && reptable.back().pattern[reptable.back().pattern.size() - 1] == '$') {
1360             type += 2;
1361             reptable.back().pattern.resize(reptable.back().pattern.size() - 1);
1362           }
1363           break;
1364         }
1365         case 2: {
1366           reptable.back().outstrings[type].assign(start_piece, iter);
1367           mystrrep(reptable.back().outstrings[type], "_", " ");
1368           break;
1369         }
1370         default:
1371           break;
1372       }
1373       ++i;
1374       start_piece = mystrsep(nl, iter);
1375     }
1376     if (reptable.back().pattern.empty() || reptable.back().outstrings[type].empty()) {
1377       HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1378                        af->getlinenum());
1379       reptable.clear();
1380       return false;
1381     }
1382   }
1383   return true;
1384 }
1385 
1386 // return replacing table
get_reptable() const1387 const std::vector<replentry>& HashMgr::get_reptable() const {
1388   return reptable;
1389 }
1390