1 #ifndef _AFFIXMGR_HXX_
2 #define _AFFIXMGR_HXX_
3 #include <cstdlib>
4 #include <cstring>
5 #include <cstdio>
6 
7 #include "atypes.hxx"
8 #include "baseaffi.hxx"
9 #include "hashmgr.hxx"
10 
11 // check flag duplication
12 #define dupSFX        (1 << 0)
13 #define dupPFX        (1 << 1)
14 
15 class AffixMgr
16 {
17 
18   AffEntry *          pStart[SETSIZE];
19   AffEntry *          sStart[SETSIZE];
20   AffEntry *          pFlag[CONTSIZE];
21   AffEntry *          sFlag[CONTSIZE];
22   HashMgr *           pHMgr;
23   char *              trystring;
24   char *              encoding;
25   struct cs_info *    csconv;
26   int                 utf8;
27   struct unicode_info2 * utf_tbl;
28   int                 complexprefixes;
29   FLAG                compoundflag;
30   FLAG                compoundbegin;
31   FLAG                compoundmiddle;
32   FLAG                compoundend;
33   FLAG                compoundroot;
34   FLAG                compoundforbidflag;
35   FLAG                compoundpermitflag;
36   int                 checkcompounddup;
37   int                 checkcompoundrep;
38   int                 checkcompoundcase;
39   int                 checkcompoundtriple;
40   FLAG                forbiddenword;
41   FLAG                nosuggest;
42   FLAG                pseudoroot;
43   int                 cpdmin;
44   int                 numrep;
45   replentry *         reptable;
46   int                 nummap;
47   mapentry *          maptable;
48   int                 numbreak;
49   char **             breaktable;
50   int                 numcheckcpd;
51   replentry *         checkcpdtable;
52   int                 numdefcpd;
53   flagentry *         defcpdtable;
54   int                 maxngramsugs;
55   int                 nosplitsugs;
56   int                 sugswithdots;
57   int                 cpdwordmax;
58   int                 cpdmaxsyllable;
59   char *              cpdvowels;
60   w_char *            cpdvowels_utf16;
61   int                 cpdvowels_utf16_len;
62   char *              cpdsyllablenum;
63   const char *        pfxappnd; // BUG: not stateless
64   const char *        sfxappnd; // BUG: not stateless
65   FLAG                sfxflag;  // BUG: not stateless
66   char *              derived;  // BUG: not stateless
67   AffEntry *          sfx;      // BUG: not stateless
68   AffEntry *          pfx;      // BUG: not stateless
69   int                 checknum;
70   char *              wordchars;
71   unsigned short *    wordchars_utf16;
72   int                 wordchars_utf16_len;
73   char *              version;
74   char *              lang;
75   int                 langnum;
76   FLAG                lemma_present;
77   FLAG                circumfix;
78   FLAG                onlyincompound;
79   FLAG                keepcase;
80   int                 checksharps;
81 
82   int                 havecontclass; // boolean variable
83   char                contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
84   flag                flag_mode;
85 
86 public:
87 
88   AffixMgr(const char * affpath, HashMgr * ptr);
89   ~AffixMgr();
90   struct hentry *     affix_check(const char * word, int len,
91             const unsigned short needflag = (unsigned short) 0, char in_compound = IN_CPD_NOT);
92   struct hentry *     prefix_check(const char * word, int len,
93             char in_compound, const FLAG needflag = FLAG_NULL);
94   inline int isSubset(const char * s1, const char * s2);
95   struct hentry *     prefix_check_twosfx(const char * word, int len,
96             char in_compound, const FLAG needflag = FLAG_NULL);
97   inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
98   struct hentry *     suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx,
99 			char ** wlst, int maxSug, int * ns, const FLAG cclass = FLAG_NULL,
100                         const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
101   struct hentry *     suffix_check_twosfx(const char * word, int len,
102             int sfxopts, AffEntry* ppfx, const FLAG needflag = FLAG_NULL);
103 
104   char * affix_check_morph(const char * word, int len,
105                     const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
106   char * prefix_check_morph(const char * word, int len,
107                     char in_compound, const FLAG needflag = FLAG_NULL);
108   char * suffix_check_morph (const char * word, int len, int sfxopts, AffEntry * ppfx,
109             const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
110 
111   char * prefix_check_twosfx_morph(const char * word, int len,
112             char in_compound, const FLAG needflag = FLAG_NULL);
113   char * suffix_check_twosfx_morph(const char * word, int len,
114             int sfxopts, AffEntry * ppfx, const FLAG needflag = FLAG_NULL);
115 
116   int                 expand_rootword(struct guessword * wlst, int maxn, const char * ts,
117                         int wl, const unsigned short * ap, unsigned short al, char * bad, int);
118 
119   int                 get_syllable (const char * word, int wlen);
120   int                 cpdrep_check(const char * word, int len);
121   int                 cpdpat_check(const char * word, int len);
122   int                 defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** rwords, char all);
123   int                 cpdcase_check(const char * word, int len);
124   int                 candidate_check(const char * word, int len);
125   struct hentry *     compound_check(const char * word, int len,
126                               short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
127                               char hu_mov_rule, int * cmpdstemnum, int * cmpdstem, char is_sug);
128 
129   int compound_check_morph(const char * word, int len,
130                               short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
131                               char hu_mov_rule, char ** result, char * partresult);
132 
133   struct hentry *     lookup(const char * word);
134   int                 get_numrep();
135   struct replentry *  get_reptable();
136   int                 get_nummap();
137   struct mapentry *   get_maptable();
138   int                 get_numbreak();
139   char **             get_breaktable();
140   char *              get_encoding();
141   int                 get_langnum();
142   struct unicode_info2 * get_utf_conv();
143   char *              get_try_string();
144   const char *        get_wordchars();
145   unsigned short * get_wordchars_utf16(int * len);
146   int                 get_compound();
147   FLAG                get_compoundflag();
148   FLAG                get_compoundbegin();
149   FLAG                get_forbiddenword();
150   FLAG                get_nosuggest();
151   FLAG                get_pseudoroot();
152   FLAG                get_onlyincompound();
153   FLAG                get_compoundroot();
154   FLAG                get_lemma_present();
155   int                 get_checknum();
156   char *              get_possible_root();
157   const char *        get_prefix();
158   const char *        get_suffix();
159   const char *        get_derived();
160   const char *        get_version();
161   const int           have_contclass();
162   int                 get_utf8();
163   int                 get_complexprefixes();
164   char *              get_suffixed(char );
165   int                 get_maxngramsugs();
166   int                 get_nosplitsugs();
167   int                 get_sugswithdots(void);
168   FLAG                get_keepcase(void);
169   int                 get_checksharps(void);
170 
171 private:
172   int  parse_file(const char * affpath);
173   int  parse_try(char * line);
174   int  parse_set(char * line);
175   int  parse_flag(char * line, unsigned short * out, char * name);
176   int  parse_num(char * line, int * out, char * name);
177   int  parse_cpdflag(char * line);
178   int  parse_cpdforbid(char * line);
179   int  parse_forbid(char * line);
180   int  parse_cpdsyllable(char * line);
181   int  parse_syllablenum(char * line);
182   int  parse_reptable(char * line, FILE * af);
183   int  parse_maptable(char * line, FILE * af);
184   int  parse_breaktable(char * line, FILE * af);
185   int  parse_checkcpdtable(char * line, FILE * af);
186   int  parse_defcpdtable(char * line, FILE * af);
187   int  parse_affix(char * line, const char at, FILE * af, char * dupflags);
188   int  parse_wordchars(char * line);
189   int  parse_lang(char * line);
190   int  parse_version(char * line);
191 
192   int encodeit(struct affentry * ptr, char * cs);
193   int build_pfxtree(AffEntry* pfxptr);
194   int build_sfxtree(AffEntry* sfxptr);
195   int process_pfx_order();
196   int process_sfx_order();
197   AffEntry * process_pfx_in_order(AffEntry * ptr, AffEntry * nptr);
198   AffEntry * process_sfx_in_order(AffEntry * ptr, AffEntry * nptr);
199   int process_pfx_tree_to_list();
200   int process_sfx_tree_to_list();
201   void set_spec_utf8_encoding();
202   int redundant_condition(char, char * strip, int stripl, const char * cond, char *);
203 };
204 
205 #endif
206 
207