1 /* ***** BEGIN LICENSE BLOCK ***** 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 3 * 4 * Copyright (C) 2002-2017 Németh László 5 * 6 * The contents of this file are subject to the Mozilla Public License Version 7 * 1.1 (the "License"); you may not use this file except in compliance with 8 * the License. You may obtain a copy of the License at 9 * http://www.mozilla.org/MPL/ 10 * 11 * Software distributed under the License is distributed on an "AS IS" basis, 12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 13 * for the specific language governing rights and limitations under the 14 * License. 15 * 16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. 17 * 18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, 19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, 20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, 21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, 22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen 23 * 24 * Alternatively, the contents of this file may be used under the terms of 25 * either the GNU General Public License Version 2 or later (the "GPL"), or 26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 * in which case the provisions of the GPL or the LGPL are applicable instead 28 * of those above. If you wish to allow use of your version of this file only 29 * under the terms of either the GPL or the LGPL, and not to allow others to 30 * use your version of this file under the terms of the MPL, indicate your 31 * decision by deleting the provisions above and replace them with the notice 32 * and other provisions required by the GPL or the LGPL. If you do not delete 33 * the provisions above, a recipient may use your version of this file under 34 * the terms of any one of the MPL, the GPL or the LGPL. 35 * 36 * ***** END LICENSE BLOCK ***** */ 37 /* 38 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada 39 * And Contributors. All rights reserved. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 52 * 3. All modifications to the source code must be clearly marked as 53 * such. Binary redistributions based on modified source code 54 * must be clearly marked as modified versions in the documentation 55 * and/or other materials provided with the distribution. 56 * 57 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS 58 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 59 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 60 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 61 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 63 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 64 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 #ifndef AFFIXMGR_HXX_ 72 #define AFFIXMGR_HXX_ 73 74 #include <stdio.h> 75 76 #include <string> 77 #include <vector> 78 79 #include "atypes.hxx" 80 #include "baseaffix.hxx" 81 #include "hashmgr.hxx" 82 #include "phonet.hxx" 83 #include "replist.hxx" 84 85 // check flag duplication 86 #define dupSFX (1 << 0) 87 #define dupPFX (1 << 1) 88 89 class PfxEntry; 90 class SfxEntry; 91 92 class AffixMgr { 93 PfxEntry* pStart[SETSIZE]; 94 SfxEntry* sStart[SETSIZE]; 95 PfxEntry* pFlag[SETSIZE]; 96 SfxEntry* sFlag[SETSIZE]; 97 const std::vector<HashMgr*>& alldic; 98 const HashMgr* pHMgr; 99 std::string keystring; 100 std::string trystring; 101 std::string encoding; 102 struct cs_info* csconv; 103 int utf8; 104 int complexprefixes; 105 FLAG compoundflag; 106 FLAG compoundbegin; 107 FLAG compoundmiddle; 108 FLAG compoundend; 109 FLAG compoundroot; 110 FLAG compoundforbidflag; 111 FLAG compoundpermitflag; 112 int compoundmoresuffixes; 113 int checkcompounddup; 114 int checkcompoundrep; 115 int checkcompoundcase; 116 int checkcompoundtriple; 117 int simplifiedtriple; 118 FLAG forbiddenword; 119 FLAG nosuggest; 120 FLAG nongramsuggest; 121 FLAG needaffix; 122 int cpdmin; 123 bool parsedrep; 124 std::vector<replentry> reptable; 125 RepList* iconvtable; 126 RepList* oconvtable; 127 bool parsedmaptable; 128 std::vector<mapentry> maptable; 129 bool parsedbreaktable; 130 std::vector<std::string> breaktable; 131 bool parsedcheckcpd; 132 std::vector<patentry> checkcpdtable; 133 int simplifiedcpd; 134 bool parseddefcpd; 135 std::vector<flagentry> defcpdtable; 136 phonetable* phone; 137 int maxngramsugs; 138 int maxcpdsugs; 139 int maxdiff; 140 int onlymaxdiff; 141 int nosplitsugs; 142 int sugswithdots; 143 int cpdwordmax; 144 int cpdmaxsyllable; 145 std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit, 146 std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding 147 std::string cpdsyllablenum; // syllable count incrementing flag 148 const char* pfxappnd; // BUG: not stateless 149 const char* sfxappnd; // BUG: not stateless 150 int sfxextra; // BUG: not stateless 151 FLAG sfxflag; // BUG: not stateless 152 char* derived; // BUG: not stateless 153 SfxEntry* sfx; // BUG: not stateless 154 PfxEntry* pfx; // BUG: not stateless 155 int checknum; 156 std::string wordchars; // letters + spec. word characters 157 std::vector<w_char> wordchars_utf16; 158 std::string ignorechars; // letters + spec. word characters 159 std::vector<w_char> ignorechars_utf16; 160 std::string version; // affix and dictionary file version string 161 std::string lang; // language 162 int langnum; 163 FLAG lemma_present; 164 FLAG circumfix; 165 FLAG onlyincompound; 166 FLAG keepcase; 167 FLAG forceucase; 168 FLAG warn; 169 int forbidwarn; 170 FLAG substandard; 171 int checksharps; 172 int fullstrip; 173 174 int havecontclass; // boolean variable 175 char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold 176 // affix) 177 178 public: 179 AffixMgr(const char* affpath, const std::vector<HashMgr*>& ptr, const char* key = NULL); 180 ~AffixMgr(); 181 struct hentry* affix_check(const char* word, 182 int len, 183 const unsigned short needflag = (unsigned short)0, 184 char in_compound = IN_CPD_NOT); 185 struct hentry* prefix_check(const char* word, 186 int len, 187 char in_compound, 188 const FLAG needflag = FLAG_NULL); 189 inline int isSubset(const char* s1, const char* s2); 190 struct hentry* prefix_check_twosfx(const char* word, 191 int len, 192 char in_compound, 193 const FLAG needflag = FLAG_NULL); 194 inline int isRevSubset(const char* s1, const char* end_of_s2, int len); 195 struct hentry* suffix_check(const char* word, 196 int len, 197 int sfxopts, 198 PfxEntry* ppfx, 199 const FLAG cclass = FLAG_NULL, 200 const FLAG needflag = FLAG_NULL, 201 char in_compound = IN_CPD_NOT); 202 struct hentry* suffix_check_twosfx(const char* word, 203 int len, 204 int sfxopts, 205 PfxEntry* ppfx, 206 const FLAG needflag = FLAG_NULL); 207 208 std::string affix_check_morph(const char* word, 209 int len, 210 const FLAG needflag = FLAG_NULL, 211 char in_compound = IN_CPD_NOT); 212 std::string prefix_check_morph(const char* word, 213 int len, 214 char in_compound, 215 const FLAG needflag = FLAG_NULL); 216 std::string suffix_check_morph(const char* word, 217 int len, 218 int sfxopts, 219 PfxEntry* ppfx, 220 const FLAG cclass = FLAG_NULL, 221 const FLAG needflag = FLAG_NULL, 222 char in_compound = IN_CPD_NOT); 223 224 std::string prefix_check_twosfx_morph(const char* word, 225 int len, 226 char in_compound, 227 const FLAG needflag = FLAG_NULL); 228 std::string suffix_check_twosfx_morph(const char* word, 229 int len, 230 int sfxopts, 231 PfxEntry* ppfx, 232 const FLAG needflag = FLAG_NULL); 233 234 std::string morphgen(const char* ts, 235 int wl, 236 const unsigned short* ap, 237 unsigned short al, 238 const char* morph, 239 const char* targetmorph, 240 int level); 241 242 int expand_rootword(struct guessword* wlst, 243 int maxn, 244 const char* ts, 245 int wl, 246 const unsigned short* ap, 247 unsigned short al, 248 const char* bad, 249 int, 250 const char*); 251 252 short get_syllable(const std::string& word); 253 int cpdrep_check(const char* word, int len); 254 int cpdpat_check(const char* word, 255 int len, 256 hentry* r1, 257 hentry* r2, 258 const char affixed); 259 int defcpd_check(hentry*** words, 260 short wnum, 261 hentry* rv, 262 hentry** rwords, 263 char all); 264 int cpdcase_check(const char* word, int len); 265 inline int candidate_check(const char* word, int len); 266 void setcminmax(int* cmin, int* cmax, const char* word, int len); 267 struct hentry* compound_check(const std::string& word, 268 short wordnum, 269 short numsyllable, 270 short maxwordnum, 271 short wnum, 272 hentry** words, 273 hentry** rwords, 274 char hu_mov_rule, 275 char is_sug, 276 int* info); 277 278 int compound_check_morph(const char* word, 279 int len, 280 short wordnum, 281 short numsyllable, 282 short maxwordnum, 283 short wnum, 284 hentry** words, 285 hentry** rwords, 286 char hu_mov_rule, 287 std::string& result, 288 const std::string* partresult); 289 290 std::vector<std::string> get_suffix_words(short unsigned* suff, 291 int len, 292 const char* root_word); 293 294 struct hentry* lookup(const char* word); 295 const std::vector<replentry>& get_reptable() const; 296 RepList* get_iconvtable() const; 297 RepList* get_oconvtable() const; 298 struct phonetable* get_phonetable() const; 299 const std::vector<mapentry>& get_maptable() const; 300 const std::vector<std::string>& get_breaktable() const; 301 const std::string& get_encoding(); 302 int get_langnum() const; 303 char* get_key_string(); 304 char* get_try_string() const; 305 const std::string& get_wordchars() const; 306 const std::vector<w_char>& get_wordchars_utf16() const; 307 const char* get_ignore() const; 308 const std::vector<w_char>& get_ignore_utf16() const; 309 int get_compound() const; 310 FLAG get_compoundflag() const; 311 FLAG get_forbiddenword() const; 312 FLAG get_nosuggest() const; 313 FLAG get_nongramsuggest() const; 314 FLAG get_needaffix() const; 315 FLAG get_onlyincompound() const; 316 const char* get_derived() const; 317 const std::string& get_version() const; 318 int have_contclass() const; 319 int get_utf8() const; 320 int get_complexprefixes() const; 321 char* get_suffixed(char) const; 322 int get_maxngramsugs() const; 323 int get_maxcpdsugs() const; 324 int get_maxdiff() const; 325 int get_onlymaxdiff() const; 326 int get_nosplitsugs() const; 327 int get_sugswithdots(void) const; 328 FLAG get_keepcase(void) const; 329 FLAG get_forceucase(void) const; 330 FLAG get_warn(void) const; 331 int get_forbidwarn(void) const; 332 int get_checksharps(void) const; 333 char* encode_flag(unsigned short aflag) const; 334 int get_fullstrip() const; 335 336 private: 337 int parse_file(const char* affpath, const char* key); 338 bool parse_flag(const std::string& line, unsigned short* out, FileMgr* af); 339 bool parse_num(const std::string& line, int* out, FileMgr* af); 340 bool parse_cpdsyllable(const std::string& line, FileMgr* af); 341 bool parse_reptable(const std::string& line, FileMgr* af); 342 bool parse_convtable(const std::string& line, 343 FileMgr* af, 344 RepList** rl, 345 const std::string& keyword); 346 bool parse_phonetable(const std::string& line, FileMgr* af); 347 bool parse_maptable(const std::string& line, FileMgr* af); 348 bool parse_breaktable(const std::string& line, FileMgr* af); 349 bool parse_checkcpdtable(const std::string& line, FileMgr* af); 350 bool parse_defcpdtable(const std::string& line, FileMgr* af); 351 bool parse_affix(const std::string& line, const char at, FileMgr* af, char* dupflags); 352 353 void reverse_condition(std::string&); 354 std::string& debugflag(std::string& result, unsigned short flag); 355 int condlen(const char*); 356 int encodeit(AffEntry& entry, const char* cs); 357 int build_pfxtree(PfxEntry* pfxptr); 358 int build_sfxtree(SfxEntry* sfxptr); 359 int process_pfx_order(); 360 int process_sfx_order(); 361 PfxEntry* process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr); 362 SfxEntry* process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr); 363 int process_pfx_tree_to_list(); 364 int process_sfx_tree_to_list(); 365 int redundant_condition(char, const char* strip, int stripl, const char* cond, int); 366 void finishFileMgr(FileMgr* afflst); 367 }; 368 369 #endif 370