1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * Copyright (C) 2002-2017 Németh László
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17 *
18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23 *
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
35 *
36 * ***** END LICENSE BLOCK ***** */
37 /*
38 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39 * And Contributors. All rights reserved.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 *
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 *
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 *
52 * 3. All modifications to the source code must be clearly marked as
53 * such. Binary redistributions based on modified source code
54 * must be clearly marked as modified versions in the documentation
55 * and/or other materials provided with the distribution.
56 *
57 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
61 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 */
70
71 #include <stdlib.h>
72 #include <string.h>
73 #include <stdio.h>
74 #include <ctype.h>
75 #include <limits>
76 #include <sstream>
77
78 #include "hashmgr.hxx"
79 #include "csutil.hxx"
80 #include "atypes.hxx"
81 #include "langnum.hxx"
82
83 // build a hash table from a munched word list
84
HashMgr(const char * tpath,const char * apath,const char * key)85 HashMgr::HashMgr(const char* tpath, const char* apath, const char* key)
86 : tablesize(0),
87 tableptr(NULL),
88 flag_mode(FLAG_CHAR),
89 complexprefixes(0),
90 utf8(0),
91 forbiddenword(FORBIDDENWORD) // forbidden word signing flag
92 ,
93 numaliasf(0),
94 aliasf(NULL),
95 aliasflen(0),
96 numaliasm(0),
97 aliasm(NULL) {
98 langnum = 0;
99 csconv = 0;
100 load_config(apath, key);
101 int ec = load_tables(tpath, key);
102 if (ec) {
103 /* error condition - what should we do here */
104 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n", ec);
105 free(tableptr);
106 //keep tablesize to 1 to fix possible division with zero
107 tablesize = 1;
108 tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*));
109 if (!tableptr) {
110 tablesize = 0;
111 }
112 }
113 }
114
~HashMgr()115 HashMgr::~HashMgr() {
116 if (tableptr) {
117 // now pass through hash table freeing up everything
118 // go through column by column of the table
119 for (int i = 0; i < tablesize; i++) {
120 struct hentry* pt = tableptr[i];
121 struct hentry* nt = NULL;
122 while (pt) {
123 nt = pt->next;
124 if (pt->astr &&
125 (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen)))
126 free(pt->astr);
127 free(pt);
128 pt = nt;
129 }
130 }
131 free(tableptr);
132 }
133 tablesize = 0;
134
135 if (aliasf) {
136 for (int j = 0; j < (numaliasf); j++)
137 free(aliasf[j]);
138 free(aliasf);
139 aliasf = NULL;
140 if (aliasflen) {
141 free(aliasflen);
142 aliasflen = NULL;
143 }
144 }
145 if (aliasm) {
146 for (int j = 0; j < (numaliasm); j++)
147 free(aliasm[j]);
148 free(aliasm);
149 aliasm = NULL;
150 }
151
152 #ifndef OPENOFFICEORG
153 #ifndef MOZILLA_CLIENT
154 if (utf8)
155 free_utf_tbl();
156 #endif
157 #endif
158
159 #ifdef MOZILLA_CLIENT
160 delete[] csconv;
161 #endif
162 }
163
164 // lookup a root word in the hashtable
165
lookup(const char * word) const166 struct hentry* HashMgr::lookup(const char* word) const {
167 struct hentry* dp;
168 if (tableptr) {
169 dp = tableptr[hash(word)];
170 if (!dp)
171 return NULL;
172 for (; dp != NULL; dp = dp->next) {
173 if (strcmp(word, dp->word) == 0)
174 return dp;
175 }
176 }
177 return NULL;
178 }
179
180 // add a word to the hash table (private)
add_word(const std::string & in_word,int wcl,unsigned short * aff,int al,const std::string * in_desc,bool onlyupcase,int captype)181 int HashMgr::add_word(const std::string& in_word,
182 int wcl,
183 unsigned short* aff,
184 int al,
185 const std::string* in_desc,
186 bool onlyupcase,
187 int captype) {
188 const std::string* word = &in_word;
189 const std::string* desc = in_desc;
190
191 std::string *word_copy = NULL;
192 std::string *desc_copy = NULL;
193 if ((!ignorechars.empty() && !has_no_ignored_chars(in_word, ignorechars)) || complexprefixes) {
194 word_copy = new std::string(in_word);
195
196 if (!ignorechars.empty()) {
197 if (utf8) {
198 wcl = remove_ignored_chars_utf(*word_copy, ignorechars_utf16);
199 } else {
200 remove_ignored_chars(*word_copy, ignorechars);
201 }
202 }
203
204 if (complexprefixes) {
205 if (utf8)
206 wcl = reverseword_utf(*word_copy);
207 else
208 reverseword(*word_copy);
209
210 if (in_desc && !aliasm) {
211 desc_copy = new std::string(*in_desc);
212
213 if (complexprefixes) {
214 if (utf8)
215 reverseword_utf(*desc_copy);
216 else
217 reverseword(*desc_copy);
218 }
219 desc = desc_copy;
220 }
221 }
222
223 word = word_copy;
224 }
225
226 bool upcasehomonym = false;
227 int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0;
228 // variable-length hash record with word and optional fields
229 struct hentry* hp =
230 (struct hentry*)malloc(sizeof(struct hentry) + word->size() + descl);
231 if (!hp) {
232 delete desc_copy;
233 delete word_copy;
234 return 1;
235 }
236
237 char* hpw = hp->word;
238 strcpy(hpw, word->c_str());
239
240 int i = hash(hpw);
241
242 hp->blen = (unsigned char)word->size();
243 hp->clen = (unsigned char)wcl;
244 hp->alen = (short)al;
245 hp->astr = aff;
246 hp->next = NULL;
247 hp->next_homonym = NULL;
248 hp->var = (captype == INITCAP) ? H_OPT_INITCAP : 0;
249
250 // store the description string or its pointer
251 if (desc) {
252 hp->var += H_OPT;
253 if (aliasm) {
254 hp->var += H_OPT_ALIASM;
255 store_pointer(hpw + word->size() + 1, get_aliasm(atoi(desc->c_str())));
256 } else {
257 strcpy(hpw + word->size() + 1, desc->c_str());
258 }
259 if (strstr(HENTRY_DATA(hp), MORPH_PHON)) {
260 hp->var += H_OPT_PHON;
261 // store ph: fields (pronounciation, misspellings, old orthography etc.)
262 // of a morphological description in reptable to use in REP replacements.
263 if (reptable.capacity() < (unsigned int)(tablesize/MORPH_PHON_RATIO))
264 reptable.reserve(tablesize/MORPH_PHON_RATIO);
265 std::string fields = HENTRY_DATA(hp);
266 std::string::const_iterator iter = fields.begin();
267 std::string::const_iterator start_piece = mystrsep(fields, iter);
268 while (start_piece != fields.end()) {
269 if (std::string(start_piece, iter).find(MORPH_PHON) == 0) {
270 std::string ph = std::string(start_piece, iter).substr(sizeof MORPH_PHON - 1);
271 if (ph.size() > 0) {
272 std::vector<w_char> w;
273 size_t strippatt;
274 std::string wordpart;
275 // dictionary based REP replacement, separated by "->"
276 // for example "pretty ph:prity ph:priti->pretti" to handle
277 // both prity -> pretty and pritier -> prettiest suggestions.
278 if (((strippatt = ph.find("->")) != std::string::npos) &&
279 (strippatt > 0) && (strippatt < ph.size() - 2)) {
280 wordpart = ph.substr(strippatt + 2);
281 ph.erase(ph.begin() + strippatt, ph.end());
282 } else
283 wordpart = in_word;
284 // when the ph: field ends with the character *,
285 // strip last character of the pattern and the replacement
286 // to match in REP suggestions also at character changes,
287 // for example, "pretty ph:prity*" results "prit->prett"
288 // REP replacement instead of "prity->pretty", to get
289 // prity->pretty and pritiest->prettiest suggestions.
290 if (ph.at(ph.size()-1) == '*') {
291 strippatt = 1;
292 size_t stripword = 0;
293 if (utf8) {
294 while ((strippatt < ph.size()) &&
295 ((ph.at(ph.size()-strippatt-1) & 0xc0) == 0x80))
296 ++strippatt;
297 while ((stripword < wordpart.size()) &&
298 ((wordpart.at(wordpart.size()-stripword-1) & 0xc0) == 0x80))
299 ++stripword;
300 }
301 ++strippatt;
302 ++stripword;
303 if ((ph.size() > strippatt) && (wordpart.size() > stripword)) {
304 ph.erase(ph.size()-strippatt, strippatt);
305 wordpart.erase(in_word.size()-stripword, stripword);
306 }
307 }
308 // capitalize lowercase pattern for capitalized words to support
309 // good suggestions also for capitalized misspellings, eg.
310 // Wednesday ph:wendsay
311 // results wendsay -> Wednesday and Wendsay -> Wednesday, too.
312 if (captype==INITCAP) {
313 std::string ph_capitalized;
314 if (utf8) {
315 u8_u16(w, ph);
316 if (get_captype_utf8(w, langnum) == NOCAP) {
317 mkinitcap_utf(w, langnum);
318 u16_u8(ph_capitalized, w);
319 }
320 } else if (get_captype(ph, csconv) == NOCAP)
321 mkinitcap(ph_capitalized, csconv);
322
323 if (ph_capitalized.size() > 0) {
324 // add also lowercase word in the case of German or
325 // Hungarian to support lowercase suggestions lowercased by
326 // compound word generation or derivational suffixes
327 // (for example by adjectival suffix "-i" of geographical
328 // names in Hungarian:
329 // Massachusetts ph:messzecsuzec
330 // messzecsuzeci -> massachusettsi (adjective)
331 // For lowercasing by conditional PFX rules, see
332 // tests/germancompounding test example or the
333 // Hungarian dictionary.)
334 if (langnum == LANG_de || langnum == LANG_hu) {
335 std::string wordpart_lower(wordpart);
336 if (utf8) {
337 u8_u16(w, wordpart_lower);
338 mkallsmall_utf(w, langnum);
339 u16_u8(wordpart_lower, w);
340 } else {
341 mkallsmall(wordpart_lower, csconv);
342 }
343 reptable.push_back(replentry());
344 reptable.back().pattern.assign(ph);
345 reptable.back().outstrings[0].assign(wordpart_lower);
346 }
347 reptable.push_back(replentry());
348 reptable.back().pattern.assign(ph_capitalized);
349 reptable.back().outstrings[0].assign(wordpart);
350 }
351 }
352 reptable.push_back(replentry());
353 reptable.back().pattern.assign(ph);
354 reptable.back().outstrings[0].assign(wordpart);
355 }
356 }
357 start_piece = mystrsep(fields, iter);
358 }
359 }
360 }
361
362 struct hentry* dp = tableptr[i];
363 if (!dp) {
364 tableptr[i] = hp;
365 delete desc_copy;
366 delete word_copy;
367 return 0;
368 }
369 while (dp->next != NULL) {
370 if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
371 // remove hidden onlyupcase homonym
372 if (!onlyupcase) {
373 if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
374 free(dp->astr);
375 dp->astr = hp->astr;
376 dp->alen = hp->alen;
377 free(hp);
378 delete desc_copy;
379 delete word_copy;
380 return 0;
381 } else {
382 dp->next_homonym = hp;
383 }
384 } else {
385 upcasehomonym = true;
386 }
387 }
388 dp = dp->next;
389 }
390 if (strcmp(hp->word, dp->word) == 0) {
391 // remove hidden onlyupcase homonym
392 if (!onlyupcase) {
393 if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
394 free(dp->astr);
395 dp->astr = hp->astr;
396 dp->alen = hp->alen;
397 free(hp);
398 delete desc_copy;
399 delete word_copy;
400 return 0;
401 } else {
402 dp->next_homonym = hp;
403 }
404 } else {
405 upcasehomonym = true;
406 }
407 }
408 if (!upcasehomonym) {
409 dp->next = hp;
410 } else {
411 // remove hidden onlyupcase homonym
412 if (hp->astr)
413 free(hp->astr);
414 free(hp);
415 }
416
417 delete desc_copy;
418 delete word_copy;
419 return 0;
420 }
421
add_hidden_capitalized_word(const std::string & word,int wcl,unsigned short * flags,int flagslen,const std::string * dp,int captype)422 int HashMgr::add_hidden_capitalized_word(const std::string& word,
423 int wcl,
424 unsigned short* flags,
425 int flagslen,
426 const std::string* dp,
427 int captype) {
428 if (flags == NULL)
429 flagslen = 0;
430
431 // add inner capitalized forms to handle the following allcap forms:
432 // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
433 // Allcaps with suffixes: CIA's -> CIA'S
434 if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
435 ((captype == ALLCAP) && (flagslen != 0))) &&
436 !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) {
437 unsigned short* flags2 =
438 (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1));
439 if (!flags2)
440 return 1;
441 if (flagslen)
442 memcpy(flags2, flags, flagslen * sizeof(unsigned short));
443 flags2[flagslen] = ONLYUPCASEFLAG;
444 if (utf8) {
445 std::string st;
446 std::vector<w_char> w;
447 u8_u16(w, word);
448 mkallsmall_utf(w, langnum);
449 mkinitcap_utf(w, langnum);
450 u16_u8(st, w);
451 return add_word(st, wcl, flags2, flagslen + 1, dp, true, INITCAP);
452 } else {
453 std::string new_word(word);
454 mkallsmall(new_word, csconv);
455 mkinitcap(new_word, csconv);
456 int ret = add_word(new_word, wcl, flags2, flagslen + 1, dp, true, INITCAP);
457 return ret;
458 }
459 }
460 return 0;
461 }
462
463 // detect captype and modify word length for UTF-8 encoding
get_clen_and_captype(const std::string & word,int * captype,std::vector<w_char> & workbuf)464 int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) {
465 int len;
466 if (utf8) {
467 len = u8_u16(workbuf, word);
468 *captype = get_captype_utf8(workbuf, langnum);
469 } else {
470 len = word.size();
471 *captype = get_captype(word, csconv);
472 }
473 return len;
474 }
475
get_clen_and_captype(const std::string & word,int * captype)476 int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
477 std::vector<w_char> workbuf;
478 return get_clen_and_captype(word, captype, workbuf);
479 }
480
481 // remove word (personal dictionary function for standalone applications)
remove(const std::string & word)482 int HashMgr::remove(const std::string& word) {
483 struct hentry* dp = lookup(word.c_str());
484 while (dp) {
485 if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
486 unsigned short* flags =
487 (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1));
488 if (!flags)
489 return 1;
490 for (int i = 0; i < dp->alen; i++)
491 flags[i] = dp->astr[i];
492 flags[dp->alen] = forbiddenword;
493 free(dp->astr);
494 dp->astr = flags;
495 dp->alen++;
496 std::sort(flags, flags + dp->alen);
497 }
498 dp = dp->next_homonym;
499 }
500 return 0;
501 }
502
503 /* remove forbidden flag to add a personal word to the hash */
remove_forbidden_flag(const std::string & word)504 int HashMgr::remove_forbidden_flag(const std::string& word) {
505 struct hentry* dp = lookup(word.c_str());
506 if (!dp)
507 return 1;
508 while (dp) {
509 if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen))
510 dp->alen = 0; // XXX forbidden words of personal dic.
511 dp = dp->next_homonym;
512 }
513 return 0;
514 }
515
516 // add a custom dic. word to the hash table (public)
add(const std::string & word)517 int HashMgr::add(const std::string& word) {
518 if (remove_forbidden_flag(word)) {
519 int captype;
520 int al = 0;
521 unsigned short* flags = NULL;
522 int wcl = get_clen_and_captype(word, &captype);
523 add_word(word, wcl, flags, al, NULL, false, captype);
524 return add_hidden_capitalized_word(word, wcl, flags, al, NULL,
525 captype);
526 }
527 return 0;
528 }
529
add_with_affix(const std::string & word,const std::string & example)530 int HashMgr::add_with_affix(const std::string& word, const std::string& example) {
531 // detect captype and modify word length for UTF-8 encoding
532 struct hentry* dp = lookup(example.c_str());
533 remove_forbidden_flag(word);
534 if (dp && dp->astr) {
535 int captype;
536 int wcl = get_clen_and_captype(word, &captype);
537 if (aliasf) {
538 add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype);
539 } else {
540 unsigned short* flags =
541 (unsigned short*)malloc(dp->alen * sizeof(unsigned short));
542 if (flags) {
543 memcpy((void*)flags, (void*)dp->astr,
544 dp->alen * sizeof(unsigned short));
545 add_word(word, wcl, flags, dp->alen, NULL, false, captype);
546 } else
547 return 1;
548 }
549 return add_hidden_capitalized_word(word, wcl, dp->astr,
550 dp->alen, NULL, captype);
551 }
552 return 1;
553 }
554
555 // walk the hash table entry by entry - null at end
556 // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
walk_hashtable(int & col,struct hentry * hp) const557 struct hentry* HashMgr::walk_hashtable(int& col, struct hentry* hp) const {
558 if (hp && hp->next != NULL)
559 return hp->next;
560 for (col++; col < tablesize; col++) {
561 if (tableptr[col])
562 return tableptr[col];
563 }
564 // null at end and reset to start
565 col = -1;
566 return NULL;
567 }
568
569 // load a munched word list and build a hash table on the fly
load_tables(const char * tpath,const char * key)570 int HashMgr::load_tables(const char* tpath, const char* key) {
571 // open dictionary file
572 FileMgr* dict = new FileMgr(tpath, key);
573 if (dict == NULL)
574 return 1;
575
576 // first read the first line of file to get hash table size */
577 std::string ts;
578 if (!dict->getline(ts)) {
579 HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath);
580 delete dict;
581 return 2;
582 }
583 mychomp(ts);
584
585 /* remove byte order mark */
586 if (ts.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {
587 ts.erase(0, 3);
588 }
589
590 tablesize = atoi(ts.c_str());
591
592 int nExtra = 5 + USERWORD;
593
594 if (tablesize <= 0 ||
595 (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) /
596 int(sizeof(struct hentry*)))) {
597 HUNSPELL_WARNING(
598 stderr, "error: line 1: missing or bad word count in the dic file\n");
599 delete dict;
600 return 4;
601 }
602 tablesize += nExtra;
603 if ((tablesize % 2) == 0)
604 tablesize++;
605
606 // allocate the hash table
607 tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*));
608 if (!tableptr) {
609 delete dict;
610 return 3;
611 }
612
613 // loop through all words on much list and add to hash
614 // table and create word and affix strings
615
616 std::vector<w_char> workbuf;
617
618 while (dict->getline(ts)) {
619 mychomp(ts);
620 // split each line into word and morphological description
621 size_t dp_pos = 0;
622 while ((dp_pos = ts.find(':', dp_pos)) != std::string::npos) {
623 if ((dp_pos > 3) && (ts[dp_pos - 3] == ' ' || ts[dp_pos - 3] == '\t')) {
624 for (dp_pos -= 3; dp_pos > 0 && (ts[dp_pos-1] == ' ' || ts[dp_pos-1] == '\t'); --dp_pos)
625 ;
626 if (dp_pos == 0) { // missing word
627 dp_pos = std::string::npos;
628 } else {
629 ++dp_pos;
630 }
631 break;
632 }
633 ++dp_pos;
634 }
635
636 // tabulator is the old morphological field separator
637 size_t dp2_pos = ts.find('\t');
638 if (dp2_pos != std::string::npos && (dp_pos == std::string::npos || dp2_pos < dp_pos)) {
639 dp_pos = dp2_pos + 1;
640 }
641
642 std::string dp;
643 if (dp_pos != std::string::npos) {
644 dp.assign(ts.substr(dp_pos));
645 ts.resize(dp_pos - 1);
646 }
647
648 // split each line into word and affix char strings
649 // "\/" signs slash in words (not affix separator)
650 // "/" at beginning of the line is word character (not affix separator)
651 size_t ap_pos = ts.find('/');
652 while (ap_pos != std::string::npos) {
653 if (ap_pos == 0) {
654 ++ap_pos;
655 continue;
656 } else if (ts[ap_pos - 1] != '\\')
657 break;
658 // replace "\/" with "/"
659 ts.erase(ap_pos - 1, 1);
660 ap_pos = ts.find('/', ap_pos);
661 }
662
663 unsigned short* flags;
664 int al;
665 if (ap_pos != std::string::npos && ap_pos != ts.size()) {
666 std::string ap(ts.substr(ap_pos + 1));
667 ts.resize(ap_pos);
668 if (aliasf) {
669 int index = atoi(ap.c_str());
670 al = get_aliasf(index, &flags, dict);
671 if (!al) {
672 HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n",
673 dict->getlinenum());
674 }
675 } else {
676 al = decode_flags(&flags, ap.c_str(), dict);
677 if (al == -1) {
678 HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
679 delete dict;
680 return 6;
681 }
682 std::sort(flags, flags + al);
683 }
684 } else {
685 al = 0;
686 flags = NULL;
687 }
688
689 int captype;
690 int wcl = get_clen_and_captype(ts, &captype, workbuf);
691 const std::string *dp_str = dp.empty() ? NULL : &dp;
692 // add the word and its index plus its capitalized form optionally
693 if (add_word(ts, wcl, flags, al, dp_str, false, captype) ||
694 add_hidden_capitalized_word(ts, wcl, flags, al, dp_str, captype)) {
695 delete dict;
696 return 5;
697 }
698 }
699
700 delete dict;
701 return 0;
702 }
703
704 // the hash function is a simple load and rotate
705 // algorithm borrowed
hash(const char * word) const706 int HashMgr::hash(const char* word) const {
707 unsigned long hv = 0;
708 for (int i = 0; i < 4 && *word != 0; i++)
709 hv = (hv << 8) | (*word++);
710 while (*word != 0) {
711 ROTATE(hv, ROTATE_LEN);
712 hv ^= (*word++);
713 }
714 return (unsigned long)hv % tablesize;
715 }
716
decode_flags(unsigned short ** result,const std::string & flags,FileMgr * af) const717 int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const {
718 int len;
719 if (flags.empty()) {
720 *result = NULL;
721 return 0;
722 }
723 switch (flag_mode) {
724 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
725 len = flags.size();
726 if (len % 2 == 1)
727 HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
728 af->getlinenum());
729 len /= 2;
730 *result = (unsigned short*)malloc(len * sizeof(unsigned short));
731 if (!*result)
732 return -1;
733 for (int i = 0; i < len; i++) {
734 (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) +
735 (unsigned char)flags[i * 2 + 1];
736 }
737 break;
738 }
739 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521
740 // 23 233)
741 len = 1;
742 unsigned short* dest;
743 for (size_t i = 0; i < flags.size(); ++i) {
744 if (flags[i] == ',')
745 len++;
746 }
747 *result = (unsigned short*)malloc(len * sizeof(unsigned short));
748 if (!*result)
749 return -1;
750 dest = *result;
751 const char* src = flags.c_str();
752 for (const char* p = src; *p; p++) {
753 if (*p == ',') {
754 int i = atoi(src);
755 if (i >= DEFAULTFLAGS)
756 HUNSPELL_WARNING(
757 stderr, "error: line %d: flag id %d is too large (max: %d)\n",
758 af->getlinenum(), i, DEFAULTFLAGS - 1);
759 *dest = (unsigned short)i;
760 if (*dest == 0)
761 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
762 af->getlinenum());
763 src = p + 1;
764 dest++;
765 }
766 }
767 int i = atoi(src);
768 if (i >= DEFAULTFLAGS)
769 HUNSPELL_WARNING(stderr,
770 "error: line %d: flag id %d is too large (max: %d)\n",
771 af->getlinenum(), i, DEFAULTFLAGS - 1);
772 *dest = (unsigned short)i;
773 if (*dest == 0)
774 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
775 af->getlinenum());
776 break;
777 }
778 case FLAG_UNI: { // UTF-8 characters
779 std::vector<w_char> w;
780 u8_u16(w, flags);
781 len = w.size();
782 *result = (unsigned short*)malloc(len * sizeof(unsigned short));
783 if (!*result)
784 return -1;
785 memcpy(*result, &w[0], len * sizeof(short));
786 break;
787 }
788 default: { // Ispell's one-character flags (erfg -> e r f g)
789 unsigned short* dest;
790 len = flags.size();
791 *result = (unsigned short*)malloc(len * sizeof(unsigned short));
792 if (!*result)
793 return -1;
794 dest = *result;
795 for (size_t i = 0; i < flags.size(); ++i) {
796 *dest = (unsigned char)flags[i];
797 dest++;
798 }
799 }
800 }
801 return len;
802 }
803
decode_flags(std::vector<unsigned short> & result,const std::string & flags,FileMgr * af) const804 bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const {
805 if (flags.empty()) {
806 return false;
807 }
808 switch (flag_mode) {
809 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
810 size_t len = flags.size();
811 if (len % 2 == 1)
812 HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
813 af->getlinenum());
814 len /= 2;
815 result.reserve(result.size() + len);
816 for (size_t i = 0; i < len; ++i) {
817 result.push_back(((unsigned short)((unsigned char)flags[i * 2]) << 8) +
818 (unsigned char)flags[i * 2 + 1]);
819 }
820 break;
821 }
822 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521
823 // 23 233)
824 const char* src = flags.c_str();
825 for (const char* p = src; *p; p++) {
826 if (*p == ',') {
827 int i = atoi(src);
828 if (i >= DEFAULTFLAGS)
829 HUNSPELL_WARNING(
830 stderr, "error: line %d: flag id %d is too large (max: %d)\n",
831 af->getlinenum(), i, DEFAULTFLAGS - 1);
832 result.push_back((unsigned short)i);
833 if (result.back() == 0)
834 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
835 af->getlinenum());
836 src = p + 1;
837 }
838 }
839 int i = atoi(src);
840 if (i >= DEFAULTFLAGS)
841 HUNSPELL_WARNING(stderr,
842 "error: line %d: flag id %d is too large (max: %d)\n",
843 af->getlinenum(), i, DEFAULTFLAGS - 1);
844 result.push_back((unsigned short)i);
845 if (result.back() == 0)
846 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
847 af->getlinenum());
848 break;
849 }
850 case FLAG_UNI: { // UTF-8 characters
851 std::vector<w_char> w;
852 u8_u16(w, flags);
853 size_t len = w.size();
854 size_t origsize = result.size();
855 result.resize(origsize + len);
856 memcpy(&result[origsize], &w[0], len * sizeof(short));
857 break;
858 }
859 default: { // Ispell's one-character flags (erfg -> e r f g)
860 result.reserve(flags.size());
861 for (size_t i = 0; i < flags.size(); ++i) {
862 result.push_back((unsigned char)flags[i]);
863 }
864 }
865 }
866 return true;
867 }
868
decode_flag(const char * f) const869 unsigned short HashMgr::decode_flag(const char* f) const {
870 unsigned short s = 0;
871 int i;
872 switch (flag_mode) {
873 case FLAG_LONG:
874 s = ((unsigned short)((unsigned char)f[0]) << 8) + (unsigned char)f[1];
875 break;
876 case FLAG_NUM:
877 i = atoi(f);
878 if (i >= DEFAULTFLAGS)
879 HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n",
880 i, DEFAULTFLAGS - 1);
881 s = (unsigned short)i;
882 break;
883 case FLAG_UNI: {
884 std::vector<w_char> w;
885 u8_u16(w, f);
886 if (!w.empty())
887 memcpy(&s, &w[0], 1 * sizeof(short));
888 break;
889 }
890 default:
891 s = *(unsigned char*)f;
892 }
893 if (s == 0)
894 HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
895 return s;
896 }
897
encode_flag(unsigned short f) const898 char* HashMgr::encode_flag(unsigned short f) const {
899 if (f == 0)
900 return mystrdup("(NULL)");
901 std::string ch;
902 if (flag_mode == FLAG_LONG) {
903 ch.push_back((unsigned char)(f >> 8));
904 ch.push_back((unsigned char)(f - ((f >> 8) << 8)));
905 } else if (flag_mode == FLAG_NUM) {
906 std::ostringstream stream;
907 stream << f;
908 ch = stream.str();
909 } else if (flag_mode == FLAG_UNI) {
910 const w_char* w_c = (const w_char*)&f;
911 std::vector<w_char> w(w_c, w_c + 1);
912 u16_u8(ch, w);
913 } else {
914 ch.push_back((unsigned char)(f));
915 }
916 return mystrdup(ch.c_str());
917 }
918
919 // read in aff file and set flag mode
load_config(const char * affpath,const char * key)920 int HashMgr::load_config(const char* affpath, const char* key) {
921 int firstline = 1;
922
923 // open the affix file
924 FileMgr* afflst = new FileMgr(affpath, key);
925 if (!afflst) {
926 HUNSPELL_WARNING(
927 stderr, "Error - could not open affix description file %s\n", affpath);
928 return 1;
929 }
930
931 // read in each line ignoring any that do not
932 // start with a known line type indicator
933
934 std::string line;
935 while (afflst->getline(line)) {
936 mychomp(line);
937
938 /* remove byte order mark */
939 if (firstline) {
940 firstline = 0;
941 if (line.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {
942 line.erase(0, 3);
943 }
944 }
945
946 /* parse in the try string */
947 if ((line.compare(0, 4, "FLAG", 4) == 0) && line.size() > 4 && isspace(line[4])) {
948 if (flag_mode != FLAG_CHAR) {
949 HUNSPELL_WARNING(stderr,
950 "error: line %d: multiple definitions of the FLAG "
951 "affix file parameter\n",
952 afflst->getlinenum());
953 }
954 if (line.find("long") != std::string::npos)
955 flag_mode = FLAG_LONG;
956 if (line.find("num") != std::string::npos)
957 flag_mode = FLAG_NUM;
958 if (line.find("UTF-8") != std::string::npos)
959 flag_mode = FLAG_UNI;
960 if (flag_mode == FLAG_CHAR) {
961 HUNSPELL_WARNING(
962 stderr,
963 "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n",
964 afflst->getlinenum());
965 }
966 }
967
968 if (line.compare(0, 13, "FORBIDDENWORD", 13) == 0) {
969 std::string st;
970 if (!parse_string(line, st, afflst->getlinenum())) {
971 delete afflst;
972 return 1;
973 }
974 forbiddenword = decode_flag(st.c_str());
975 }
976
977 if (line.compare(0, 3, "SET", 3) == 0) {
978 if (!parse_string(line, enc, afflst->getlinenum())) {
979 delete afflst;
980 return 1;
981 }
982 if (enc == "UTF-8") {
983 utf8 = 1;
984 #ifndef OPENOFFICEORG
985 #ifndef MOZILLA_CLIENT
986 initialize_utf_tbl();
987 #endif
988 #endif
989 } else
990 csconv = get_current_cs(enc);
991 }
992
993 if (line.compare(0, 4, "LANG", 4) == 0) {
994 if (!parse_string(line, lang, afflst->getlinenum())) {
995 delete afflst;
996 return 1;
997 }
998 langnum = get_lang_num(lang);
999 }
1000
1001 /* parse in the ignored characters (for example, Arabic optional diacritics
1002 * characters */
1003 if (line.compare(0, 6, "IGNORE", 6) == 0) {
1004 if (!parse_array(line, ignorechars, ignorechars_utf16,
1005 utf8, afflst->getlinenum())) {
1006 delete afflst;
1007 return 1;
1008 }
1009 }
1010
1011 if ((line.compare(0, 2, "AF", 2) == 0) && line.size() > 2 && isspace(line[2])) {
1012 if (!parse_aliasf(line, afflst)) {
1013 delete afflst;
1014 return 1;
1015 }
1016 }
1017
1018 if ((line.compare(0, 2, "AM", 2) == 0) && line.size() > 2 && isspace(line[2])) {
1019 if (!parse_aliasm(line, afflst)) {
1020 delete afflst;
1021 return 1;
1022 }
1023 }
1024
1025 if (line.compare(0, 15, "COMPLEXPREFIXES", 15) == 0)
1026 complexprefixes = 1;
1027
1028 /* parse in the typical fault correcting table */
1029 if (line.compare(0, 3, "REP", 3) == 0) {
1030 if (!parse_reptable(line, afflst)) {
1031 delete afflst;
1032 return 1;
1033 }
1034 }
1035
1036 // don't check the full affix file, yet
1037 if (((line.compare(0, 3, "SFX", 3) == 0) ||
1038 (line.compare(0, 3, "PFX", 3) == 0)) &&
1039 line.size() > 3 && isspace(line[3]) &&
1040 !reptable.empty()) // (REP table is in the end of Afrikaans aff file)
1041 break;
1042 }
1043
1044 if (csconv == NULL)
1045 csconv = get_current_cs(SPELL_ENCODING);
1046 delete afflst;
1047 return 0;
1048 }
1049
1050 /* parse in the ALIAS table */
parse_aliasf(const std::string & line,FileMgr * af)1051 bool HashMgr::parse_aliasf(const std::string& line, FileMgr* af) {
1052 if (numaliasf != 0) {
1053 HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
1054 af->getlinenum());
1055 return false;
1056 }
1057 int i = 0;
1058 int np = 0;
1059 std::string::const_iterator iter = line.begin();
1060 std::string::const_iterator start_piece = mystrsep(line, iter);
1061 while (start_piece != line.end()) {
1062 switch (i) {
1063 case 0: {
1064 np++;
1065 break;
1066 }
1067 case 1: {
1068 numaliasf = atoi(std::string(start_piece, iter).c_str());
1069 if (numaliasf < 1) {
1070 numaliasf = 0;
1071 aliasf = NULL;
1072 aliasflen = NULL;
1073 HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
1074 af->getlinenum());
1075 return false;
1076 }
1077 aliasf =
1078 (unsigned short**)malloc(numaliasf * sizeof(unsigned short*));
1079 aliasflen =
1080 (unsigned short*)malloc(numaliasf * sizeof(unsigned short));
1081 if (!aliasf || !aliasflen) {
1082 numaliasf = 0;
1083 if (aliasf)
1084 free(aliasf);
1085 if (aliasflen)
1086 free(aliasflen);
1087 aliasf = NULL;
1088 aliasflen = NULL;
1089 return false;
1090 }
1091 np++;
1092 break;
1093 }
1094 default:
1095 break;
1096 }
1097 ++i;
1098 start_piece = mystrsep(line, iter);
1099 }
1100 if (np != 2) {
1101 numaliasf = 0;
1102 free(aliasf);
1103 free(aliasflen);
1104 aliasf = NULL;
1105 aliasflen = NULL;
1106 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
1107 af->getlinenum());
1108 return false;
1109 }
1110
1111 /* now parse the numaliasf lines to read in the remainder of the table */
1112 for (int j = 0; j < numaliasf; j++) {
1113 std::string nl;
1114 if (!af->getline(nl))
1115 return false;
1116 mychomp(nl);
1117 i = 0;
1118 aliasf[j] = NULL;
1119 aliasflen[j] = 0;
1120 iter = nl.begin();
1121 start_piece = mystrsep(nl, iter);
1122 while (start_piece != nl.end()) {
1123 switch (i) {
1124 case 0: {
1125 if (nl.compare(start_piece - nl.begin(), 2, "AF", 2) != 0) {
1126 numaliasf = 0;
1127 free(aliasf);
1128 free(aliasflen);
1129 aliasf = NULL;
1130 aliasflen = NULL;
1131 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1132 af->getlinenum());
1133 return false;
1134 }
1135 break;
1136 }
1137 case 1: {
1138 std::string piece(start_piece, iter);
1139 aliasflen[j] =
1140 (unsigned short)decode_flags(&(aliasf[j]), piece, af);
1141 std::sort(aliasf[j], aliasf[j] + aliasflen[j]);
1142 break;
1143 }
1144 default:
1145 break;
1146 }
1147 ++i;
1148 start_piece = mystrsep(nl, iter);
1149 }
1150 if (!aliasf[j]) {
1151 free(aliasf);
1152 free(aliasflen);
1153 aliasf = NULL;
1154 aliasflen = NULL;
1155 numaliasf = 0;
1156 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1157 af->getlinenum());
1158 return false;
1159 }
1160 }
1161 return true;
1162 }
1163
is_aliasf() const1164 int HashMgr::is_aliasf() const {
1165 return (aliasf != NULL);
1166 }
1167
get_aliasf(int index,unsigned short ** fvec,FileMgr * af) const1168 int HashMgr::get_aliasf(int index, unsigned short** fvec, FileMgr* af) const {
1169 if ((index > 0) && (index <= numaliasf)) {
1170 *fvec = aliasf[index - 1];
1171 return aliasflen[index - 1];
1172 }
1173 HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n",
1174 af->getlinenum(), index);
1175 *fvec = NULL;
1176 return 0;
1177 }
1178
1179 /* parse morph alias definitions */
parse_aliasm(const std::string & line,FileMgr * af)1180 bool HashMgr::parse_aliasm(const std::string& line, FileMgr* af) {
1181 if (numaliasm != 0) {
1182 HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
1183 af->getlinenum());
1184 return false;
1185 }
1186 int i = 0;
1187 int np = 0;
1188 std::string::const_iterator iter = line.begin();
1189 std::string::const_iterator start_piece = mystrsep(line, iter);
1190 while (start_piece != line.end()) {
1191 switch (i) {
1192 case 0: {
1193 np++;
1194 break;
1195 }
1196 case 1: {
1197 numaliasm = atoi(std::string(start_piece, iter).c_str());
1198 if (numaliasm < 1) {
1199 HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
1200 af->getlinenum());
1201 return false;
1202 }
1203 aliasm = (char**)malloc(numaliasm * sizeof(char*));
1204 if (!aliasm) {
1205 numaliasm = 0;
1206 return false;
1207 }
1208 np++;
1209 break;
1210 }
1211 default:
1212 break;
1213 }
1214 ++i;
1215 start_piece = mystrsep(line, iter);
1216 }
1217 if (np != 2) {
1218 numaliasm = 0;
1219 free(aliasm);
1220 aliasm = NULL;
1221 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
1222 af->getlinenum());
1223 return false;
1224 }
1225
1226 /* now parse the numaliasm lines to read in the remainder of the table */
1227 for (int j = 0; j < numaliasm; j++) {
1228 std::string nl;
1229 if (!af->getline(nl))
1230 return false;
1231 mychomp(nl);
1232 aliasm[j] = NULL;
1233 iter = nl.begin();
1234 i = 0;
1235 start_piece = mystrsep(nl, iter);
1236 while (start_piece != nl.end()) {
1237 switch (i) {
1238 case 0: {
1239 if (nl.compare(start_piece - nl.begin(), 2, "AM", 2) != 0) {
1240 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1241 af->getlinenum());
1242 numaliasm = 0;
1243 free(aliasm);
1244 aliasm = NULL;
1245 return false;
1246 }
1247 break;
1248 }
1249 case 1: {
1250 // add the remaining of the line
1251 std::string::const_iterator end = nl.end();
1252 std::string chunk(start_piece, end);
1253 if (complexprefixes) {
1254 if (utf8)
1255 reverseword_utf(chunk);
1256 else
1257 reverseword(chunk);
1258 }
1259 aliasm[j] = mystrdup(chunk.c_str());
1260 break;
1261 }
1262 default:
1263 break;
1264 }
1265 ++i;
1266 start_piece = mystrsep(nl, iter);
1267 }
1268 if (!aliasm[j]) {
1269 numaliasm = 0;
1270 free(aliasm);
1271 aliasm = NULL;
1272 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1273 af->getlinenum());
1274 return false;
1275 }
1276 }
1277 return true;
1278 }
1279
is_aliasm() const1280 int HashMgr::is_aliasm() const {
1281 return (aliasm != NULL);
1282 }
1283
get_aliasm(int index) const1284 char* HashMgr::get_aliasm(int index) const {
1285 if ((index > 0) && (index <= numaliasm))
1286 return aliasm[index - 1];
1287 HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);
1288 return NULL;
1289 }
1290
1291 /* parse in the typical fault correcting table */
parse_reptable(const std::string & line,FileMgr * af)1292 bool HashMgr::parse_reptable(const std::string& line, FileMgr* af) {
1293 if (!reptable.empty()) {
1294 HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
1295 af->getlinenum());
1296 return false;
1297 }
1298 int numrep = -1;
1299 int i = 0;
1300 int np = 0;
1301 std::string::const_iterator iter = line.begin();
1302 std::string::const_iterator start_piece = mystrsep(line, iter);
1303 while (start_piece != line.end()) {
1304 switch (i) {
1305 case 0: {
1306 np++;
1307 break;
1308 }
1309 case 1: {
1310 numrep = atoi(std::string(start_piece, iter).c_str());
1311 if (numrep < 1) {
1312 HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n",
1313 af->getlinenum());
1314 return false;
1315 }
1316 reptable.reserve(numrep);
1317 np++;
1318 break;
1319 }
1320 default:
1321 break;
1322 }
1323 ++i;
1324 start_piece = mystrsep(line, iter);
1325 }
1326 if (np != 2) {
1327 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
1328 af->getlinenum());
1329 return false;
1330 }
1331
1332 /* now parse the numrep lines to read in the remainder of the table */
1333 for (int j = 0; j < numrep; ++j) {
1334 std::string nl;
1335 if (!af->getline(nl))
1336 return false;
1337 mychomp(nl);
1338 reptable.push_back(replentry());
1339 iter = nl.begin();
1340 i = 0;
1341 int type = 0;
1342 start_piece = mystrsep(nl, iter);
1343 while (start_piece != nl.end()) {
1344 switch (i) {
1345 case 0: {
1346 if (nl.compare(start_piece - nl.begin(), 3, "REP", 3) != 0) {
1347 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1348 af->getlinenum());
1349 reptable.clear();
1350 return false;
1351 }
1352 break;
1353 }
1354 case 1: {
1355 if (*start_piece == '^')
1356 type = 1;
1357 reptable.back().pattern.assign(start_piece + type, iter);
1358 mystrrep(reptable.back().pattern, "_", " ");
1359 if (!reptable.back().pattern.empty() && reptable.back().pattern[reptable.back().pattern.size() - 1] == '$') {
1360 type += 2;
1361 reptable.back().pattern.resize(reptable.back().pattern.size() - 1);
1362 }
1363 break;
1364 }
1365 case 2: {
1366 reptable.back().outstrings[type].assign(start_piece, iter);
1367 mystrrep(reptable.back().outstrings[type], "_", " ");
1368 break;
1369 }
1370 default:
1371 break;
1372 }
1373 ++i;
1374 start_piece = mystrsep(nl, iter);
1375 }
1376 if (reptable.back().pattern.empty() || reptable.back().outstrings[type].empty()) {
1377 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1378 af->getlinenum());
1379 reptable.clear();
1380 return false;
1381 }
1382 }
1383 return true;
1384 }
1385
1386 // return replacing table
get_reptable() const1387 const std::vector<replentry>& HashMgr::get_reptable() const {
1388 return reptable;
1389 }
1390