1 #ifndef __CSUTILHXX__
2 #define __CSUTILHXX__
3 
4 #include "hunvisapi.h"
5 
6 // First some base level utility routines
7 
8 #include <string.h>
9 #include "w_char.hxx"
10 #include "htypes.hxx"
11 
12 #ifdef MOZILLA_CLIENT
13 #include "nscore.h" // for mozalloc headers
14 #endif
15 
16 // casing
17 #define NOCAP   0
18 #define INITCAP 1
19 #define ALLCAP  2
20 #define HUHCAP  3
21 #define HUHINITCAP  4
22 
23 // default encoding and keystring
24 #define SPELL_ENCODING  "ISO8859-1"
25 #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
26 
27 // default morphological fields
28 #define MORPH_STEM        "st:"
29 #define MORPH_ALLOMORPH   "al:"
30 #define MORPH_POS         "po:"
31 #define MORPH_DERI_PFX    "dp:"
32 #define MORPH_INFL_PFX    "ip:"
33 #define MORPH_TERM_PFX    "tp:"
34 #define MORPH_DERI_SFX    "ds:"
35 #define MORPH_INFL_SFX    "is:"
36 #define MORPH_TERM_SFX    "ts:"
37 #define MORPH_SURF_PFX    "sp:"
38 #define MORPH_FREQ        "fr:"
39 #define MORPH_PHON        "ph:"
40 #define MORPH_HYPH        "hy:"
41 #define MORPH_PART        "pa:"
42 #define MORPH_FLAG        "fl:"
43 #define MORPH_HENTRY      "_H:"
44 #define MORPH_TAG_LEN     strlen(MORPH_STEM)
45 
46 #define MSEP_FLD ' '
47 #define MSEP_REC '\n'
48 #define MSEP_ALT '\v'
49 
50 // default flags
51 #define DEFAULTFLAGS   65510
52 #define FORBIDDENWORD  65510
53 #define ONLYUPCASEFLAG 65511
54 
55 // fopen or optional _wfopen to fix long pathname problem of WIN32
56 LIBHUNSPELL_DLL_EXPORTED FILE * myfopen(const char * path, const char * mode);
57 
58 // convert UTF-16 characters to UTF-8
59 LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
60 
61 // convert UTF-8 characters to UTF-16
62 LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
63 
64 // sort 2-byte vector
65 LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
66 
67 // binary search in 2-byte vector
68 LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
69 
70 // remove end of line char(s)
71 LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
72 
73 // duplicate string
74 LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
75 
76 // strcat for limited length destination string
77 LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
78 
79 // duplicate reverse of string
80 LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
81 
82 // parse into tokens with char delimiter
83 LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
84 // parse into tokens with char delimiter
85 LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
86 
87 // parse into tokens with char delimiter
88 LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
89 
90 // append s to ends of every lines in text
91 LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
92 
93 // tokenize into lines with new line
94 LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
95 
96 // tokenize into lines with new line and uniq in place
97 LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
98 LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
99 
100 // change oldchar to newchar in place
101 LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
102 
103 // reverse word
104 LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
105 
106 // reverse word
107 LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
108 
109 // remove duplicates
110 LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
111 
112 // free character array list
113 LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
114 
115 // character encoding information
116 struct cs_info {
117   unsigned char ccase;
118   unsigned char clower;
119   unsigned char cupper;
120 };
121 
122 LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
123 LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
124 LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
125 LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
126 LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
127 
128 LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
129 
130 // get language identifiers of language codes
131 LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
132 
133 // get characters of the given 8bit encoding with lower- and uppercase forms
134 LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
135 
136 // convert null terminated string to all caps using encoding
137 LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
138 
139 // convert null terminated string to all little using encoding
140 LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
141 
142 // convert null terminated string to have initial capital using encoding
143 LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
144 
145 // convert null terminated string to all caps
146 LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
147 
148 // convert null terminated string to all little
149 LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
150 
151 // convert null terminated string to have initial capital
152 LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
153 
154 // convert first nc characters of UTF-8 string to little
155 LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
156 
157 // convert first nc characters of UTF-8 string to capital
158 LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
159 
160 // get type of capitalization
161 LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
162 
163 // get type of capitalization (UTF-8)
164 LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
165 
166 // strip all ignored characters in the string
167 LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
168 
169 // strip all ignored characters in the string
170 LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
171 
172 LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
173 
174 LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
175     int * out_utf16_len, int utf8, int ln);
176 
177 LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
178 LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
179 
180 LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
181 
182 LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
183 
184 // conversion function for protected memory
185 LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
186 
187 // conversion function for protected memory
188 LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
189 
190 // hash entry macros
HENTRY_DATA(struct hentry * h)191 LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
192 {
193     char *ret;
194     if (!h->var)
195         ret = NULL;
196     else if (h->var & H_OPT_ALIASM)
197         ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
198     else
199         ret = HENTRY_WORD(h) + h->blen + 1;
200     return ret;
201 }
202 
203 // NULL-free version for warning-free OOo build
HENTRY_DATA2(const struct hentry * h)204 LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
205 {
206     const char *ret;
207     if (!h->var)
208         ret = "";
209     else if (h->var & H_OPT_ALIASM)
210         ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
211     else
212         ret = HENTRY_WORD(h) + h->blen + 1;
213     return ret;
214 }
215 
HENTRY_FIND(struct hentry * h,const char * p)216 LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
217 {
218     return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
219 }
220 
221 #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
222 
223 #endif
224