1 #ifndef _AFFIXMGR_HXX_ 2 #define _AFFIXMGR_HXX_ 3 #include <cstdlib> 4 #include <cstring> 5 #include <cstdio> 6 7 #include "atypes.hxx" 8 #include "baseaffi.hxx" 9 #include "hashmgr.hxx" 10 11 // check flag duplication 12 #define dupSFX (1 << 0) 13 #define dupPFX (1 << 1) 14 15 class AffixMgr 16 { 17 18 AffEntry * pStart[SETSIZE]; 19 AffEntry * sStart[SETSIZE]; 20 AffEntry * pFlag[CONTSIZE]; 21 AffEntry * sFlag[CONTSIZE]; 22 HashMgr * pHMgr; 23 char * trystring; 24 char * encoding; 25 struct cs_info * csconv; 26 int utf8; 27 struct unicode_info2 * utf_tbl; 28 int complexprefixes; 29 FLAG compoundflag; 30 FLAG compoundbegin; 31 FLAG compoundmiddle; 32 FLAG compoundend; 33 FLAG compoundroot; 34 FLAG compoundforbidflag; 35 FLAG compoundpermitflag; 36 int checkcompounddup; 37 int checkcompoundrep; 38 int checkcompoundcase; 39 int checkcompoundtriple; 40 FLAG forbiddenword; 41 FLAG nosuggest; 42 FLAG pseudoroot; 43 int cpdmin; 44 int numrep; 45 replentry * reptable; 46 int nummap; 47 mapentry * maptable; 48 int numbreak; 49 char ** breaktable; 50 int numcheckcpd; 51 replentry * checkcpdtable; 52 int numdefcpd; 53 flagentry * defcpdtable; 54 int maxngramsugs; 55 int nosplitsugs; 56 int sugswithdots; 57 int cpdwordmax; 58 int cpdmaxsyllable; 59 char * cpdvowels; 60 w_char * cpdvowels_utf16; 61 int cpdvowels_utf16_len; 62 char * cpdsyllablenum; 63 const char * pfxappnd; // BUG: not stateless 64 const char * sfxappnd; // BUG: not stateless 65 FLAG sfxflag; // BUG: not stateless 66 char * derived; // BUG: not stateless 67 AffEntry * sfx; // BUG: not stateless 68 AffEntry * pfx; // BUG: not stateless 69 int checknum; 70 char * wordchars; 71 unsigned short * wordchars_utf16; 72 int wordchars_utf16_len; 73 char * version; 74 char * lang; 75 int langnum; 76 FLAG lemma_present; 77 FLAG circumfix; 78 FLAG onlyincompound; 79 FLAG keepcase; 80 int checksharps; 81 82 int havecontclass; // boolean variable 83 char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix) 84 flag flag_mode; 85 86 public: 87 88 AffixMgr(const char * affpath, HashMgr * ptr); 89 ~AffixMgr(); 90 struct hentry * affix_check(const char * word, int len, 91 const unsigned short needflag = (unsigned short) 0, char in_compound = IN_CPD_NOT); 92 struct hentry * prefix_check(const char * word, int len, 93 char in_compound, const FLAG needflag = FLAG_NULL); 94 inline int isSubset(const char * s1, const char * s2); 95 struct hentry * prefix_check_twosfx(const char * word, int len, 96 char in_compound, const FLAG needflag = FLAG_NULL); 97 inline int isRevSubset(const char * s1, const char * end_of_s2, int len); 98 struct hentry * suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx, 99 char ** wlst, int maxSug, int * ns, const FLAG cclass = FLAG_NULL, 100 const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); 101 struct hentry * suffix_check_twosfx(const char * word, int len, 102 int sfxopts, AffEntry* ppfx, const FLAG needflag = FLAG_NULL); 103 104 char * affix_check_morph(const char * word, int len, 105 const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); 106 char * prefix_check_morph(const char * word, int len, 107 char in_compound, const FLAG needflag = FLAG_NULL); 108 char * suffix_check_morph (const char * word, int len, int sfxopts, AffEntry * ppfx, 109 const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT); 110 111 char * prefix_check_twosfx_morph(const char * word, int len, 112 char in_compound, const FLAG needflag = FLAG_NULL); 113 char * suffix_check_twosfx_morph(const char * word, int len, 114 int sfxopts, AffEntry * ppfx, const FLAG needflag = FLAG_NULL); 115 116 int expand_rootword(struct guessword * wlst, int maxn, const char * ts, 117 int wl, const unsigned short * ap, unsigned short al, char * bad, int); 118 119 int get_syllable (const char * word, int wlen); 120 int cpdrep_check(const char * word, int len); 121 int cpdpat_check(const char * word, int len); 122 int defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** rwords, char all); 123 int cpdcase_check(const char * word, int len); 124 int candidate_check(const char * word, int len); 125 struct hentry * compound_check(const char * word, int len, 126 short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words, 127 char hu_mov_rule, int * cmpdstemnum, int * cmpdstem, char is_sug); 128 129 int compound_check_morph(const char * word, int len, 130 short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words, 131 char hu_mov_rule, char ** result, char * partresult); 132 133 struct hentry * lookup(const char * word); 134 int get_numrep(); 135 struct replentry * get_reptable(); 136 int get_nummap(); 137 struct mapentry * get_maptable(); 138 int get_numbreak(); 139 char ** get_breaktable(); 140 char * get_encoding(); 141 int get_langnum(); 142 struct unicode_info2 * get_utf_conv(); 143 char * get_try_string(); 144 const char * get_wordchars(); 145 unsigned short * get_wordchars_utf16(int * len); 146 int get_compound(); 147 FLAG get_compoundflag(); 148 FLAG get_compoundbegin(); 149 FLAG get_forbiddenword(); 150 FLAG get_nosuggest(); 151 FLAG get_pseudoroot(); 152 FLAG get_onlyincompound(); 153 FLAG get_compoundroot(); 154 FLAG get_lemma_present(); 155 int get_checknum(); 156 char * get_possible_root(); 157 const char * get_prefix(); 158 const char * get_suffix(); 159 const char * get_derived(); 160 const char * get_version(); 161 const int have_contclass(); 162 int get_utf8(); 163 int get_complexprefixes(); 164 char * get_suffixed(char ); 165 int get_maxngramsugs(); 166 int get_nosplitsugs(); 167 int get_sugswithdots(void); 168 FLAG get_keepcase(void); 169 int get_checksharps(void); 170 171 private: 172 int parse_file(const char * affpath); 173 int parse_try(char * line); 174 int parse_set(char * line); 175 int parse_flag(char * line, unsigned short * out, char * name); 176 int parse_num(char * line, int * out, char * name); 177 int parse_cpdflag(char * line); 178 int parse_cpdforbid(char * line); 179 int parse_forbid(char * line); 180 int parse_cpdsyllable(char * line); 181 int parse_syllablenum(char * line); 182 int parse_reptable(char * line, FILE * af); 183 int parse_maptable(char * line, FILE * af); 184 int parse_breaktable(char * line, FILE * af); 185 int parse_checkcpdtable(char * line, FILE * af); 186 int parse_defcpdtable(char * line, FILE * af); 187 int parse_affix(char * line, const char at, FILE * af, char * dupflags); 188 int parse_wordchars(char * line); 189 int parse_lang(char * line); 190 int parse_version(char * line); 191 192 int encodeit(struct affentry * ptr, char * cs); 193 int build_pfxtree(AffEntry* pfxptr); 194 int build_sfxtree(AffEntry* sfxptr); 195 int process_pfx_order(); 196 int process_sfx_order(); 197 AffEntry * process_pfx_in_order(AffEntry * ptr, AffEntry * nptr); 198 AffEntry * process_sfx_in_order(AffEntry * ptr, AffEntry * nptr); 199 int process_pfx_tree_to_list(); 200 int process_sfx_tree_to_list(); 201 void set_spec_utf8_encoding(); 202 int redundant_condition(char, char * strip, int stripl, const char * cond, char *); 203 }; 204 205 #endif 206 207