1 /* Copyright (c) 2013 - The libcangjie authors.
2 *
3 * This file is part of libcangjie.
4 *
5 * libcangjie is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * libcangjie is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with libcangjie. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19
20 #include <stdlib.h>
21 #include <string.h>
22
23 #include "cangjie.h"
24
25
26 #define BASE_QUERY "SELECT chchar, code, frequency\n" \
27 "FROM chars\n" \
28 "INNER JOIN codes on chars.char_index=codes.char_index\n" \
29 "WHERE version=%d "
30
31 // Longest possible filter query has a length of 127:
32 // " AND ( big5 = 1 OR hkscs = 1 OR punct = 1 OR ... ) "
33 #define MAX_LEN_FILTER_QUERY 127
34
35 // Longest possible code query has a length of 19:
36 // "AND code GLOB '%q';"
37 #define MAX_LEN_CODE_QUERY 19
38
39
40 const char *cangjie_radicals[] = {
41 "\xE6\x97\xA5", // 日
42 "\xE6\x9C\x88", // 月
43 "\xE9\x87\x91", // 金
44 "\xE6\x9C\xA8", // 木
45 "\xE6\xB0\xB4", // 水
46 "\xE7\x81\xAB", // 火
47 "\xE5\x9C\x9F", // 土
48 "\xE7\xAB\xB9", // 竹
49 "\xE6\x88\x88", // 戈
50 "\xE5\x8D\x81", // 十
51 "\xE5\xA4\xA7", // 大
52 "\xE4\xB8\xAD", // 中
53 "\xE4\xB8\x80", // 一
54 "\xE5\xBC\x93", // 弓
55 "\xE4\xBA\xBA", // 人
56 "\xE5\xBF\x83", // 心
57 "\xE6\x89\x8B", // 手
58 "\xE5\x8F\xA3", // 口
59 "\xE5\xB0\xB8", // 尸
60 "\xE5\xBB\xBF", // 廿
61 "\xE5\xB1\xB1", // 山
62 "\xE5\xA5\xB3", // 女
63 "\xE7\x94\xB0", // 田
64 "\xE9\x9B\xA3", // 難
65 "\xE5\x8D\x9C", // 卜
66 "\xEF\xBC\xBA", // Z
67 };
68
strcat_or_operator(uint32_t * first,char * query)69 static void strcat_or_operator(uint32_t *first, char *query) {
70 if (! *first) {
71 strcat(query, "OR ");
72 } else {
73 *first = 0;
74 }
75 }
76
cangjie_get_filter_query(Cangjie * cj,char ** query)77 int cangjie_get_filter_query(Cangjie *cj, char **query) {
78 uint32_t first = 1;
79 if (cj->filter_flags == 0) {
80 // No filter means pass all, so let's return an empty string
81 *query = calloc(1, sizeof(char));
82 if (query == NULL) {
83 return CANGJIE_NOMEM;
84 }
85
86 return CANGJIE_OK;
87 }
88
89 *query = calloc(MAX_LEN_FILTER_QUERY + 1, sizeof(char));
90 if (query == NULL) {
91 return CANGJIE_NOMEM;
92 }
93
94 strcat(*query, " AND ( ");
95
96 if (cj->filter_flags & CANGJIE_FILTER_BIG5) {
97 strcat(*query, "big5 = 1 ");
98 first = 0;
99 }
100
101 if (cj->filter_flags & CANGJIE_FILTER_HKSCS) {
102 strcat_or_operator(&first, *query);
103 strcat(*query, "hkscs = 1 ");
104 }
105
106 if (cj->filter_flags & CANGJIE_FILTER_PUNCTUATION) {
107 strcat_or_operator(&first, *query);
108 strcat(*query, "punct = 1 ");
109 }
110
111 if (cj->filter_flags & CANGJIE_FILTER_CHINESE) {
112 strcat_or_operator(&first, *query);
113 strcat(*query, "zh = 1 ");
114 }
115
116 if (cj->filter_flags & CANGJIE_FILTER_ZHUYIN) {
117 strcat_or_operator(&first, *query);
118 strcat(*query, "zhuyin = 1 ");
119 }
120
121 if (cj->filter_flags & CANGJIE_FILTER_KANJI) {
122 strcat_or_operator(&first, *query);
123 strcat(*query, "kanji = 1 ");
124 }
125
126 if (cj->filter_flags & CANGJIE_FILTER_KATAKANA) {
127 strcat_or_operator(&first, *query);
128 strcat(*query, "katakana = 1 ");
129 }
130
131 if (cj->filter_flags & CANGJIE_FILTER_HIRAGANA) {
132 strcat_or_operator(&first, *query);
133 strcat(*query, "hiragana = 1 ");
134 }
135
136 if (cj->filter_flags & CANGJIE_FILTER_SYMBOLS) {
137 strcat_or_operator(&first, *query);
138 strcat(*query, "symbol = 1 ");
139 }
140
141 strcat(*query, ") ");
142
143 return CANGJIE_OK;
144 }
145
cangjie_new(Cangjie ** cj,CangjieVersion version,CangjieFilter filter_flags)146 int cangjie_new(Cangjie **cj,
147 CangjieVersion version,
148 CangjieFilter filter_flags) {
149 char *filter_query;
150 int ret;
151 char *database_path;
152 Cangjie *tmp = calloc(1, sizeof(Cangjie));
153 if (tmp == NULL) {
154 return CANGJIE_NOMEM;
155 }
156
157 tmp->version = version;
158 tmp->filter_flags = filter_flags;
159
160 // Prepare the main Cangjie query
161 tmp->cj_query = calloc(strlen(BASE_QUERY) + MAX_LEN_FILTER_QUERY + 1,
162 sizeof(char));
163 if (tmp->cj_query == NULL) {
164 cangjie_free(tmp);
165 return CANGJIE_NOMEM;
166 }
167
168 strcat(tmp->cj_query, BASE_QUERY);
169
170 ret = cangjie_get_filter_query(tmp, &filter_query);
171 if (ret != CANGJIE_OK) {
172 cangjie_free(tmp);
173 return ret;
174 }
175
176 strcat(tmp->cj_query, filter_query);
177 free(filter_query);
178
179 // Prepare the query by short code
180 tmp->shortcode_query = calloc(strlen(BASE_QUERY) + MAX_LEN_CODE_QUERY + 1,
181 sizeof(char));
182 if (tmp->shortcode_query == NULL) {
183 cangjie_free(tmp);
184 return CANGJIE_NOMEM;
185 }
186
187 strcat(tmp->shortcode_query, BASE_QUERY);
188 strcat(tmp->shortcode_query, "AND code = '%q';");
189
190 // Check the CANGJIE_DB env var (it is useful for local testing)
191 database_path = getenv("CANGJIE_DB");
192 if (database_path != NULL) {
193 ret = sqlite3_open_v2(database_path, &tmp->db, SQLITE_OPEN_READONLY, NULL);
194 } else {
195 ret = sqlite3_open_v2(CANGJIE_DB, &tmp->db, SQLITE_OPEN_READONLY, NULL);
196 }
197 if (ret == SQLITE_CANTOPEN) {
198 cangjie_free(tmp);
199 return CANGJIE_DBOPEN;
200 } else if (ret != SQLITE_OK) {
201 cangjie_free(tmp);
202 // FIXME: Unhandled error codes
203 return ret;
204 }
205
206 *cj = tmp;
207
208 return CANGJIE_OK;
209 }
210
cangjie_get_characters(Cangjie * cj,char * input_code,CangjieCharList ** l)211 int cangjie_get_characters(Cangjie *cj,
212 char *input_code,
213 CangjieCharList **l) {
214 CangjieCharList *tmp = NULL;
215 sqlite3_stmt *stmt;
216 char *cj_query;
217 char *query_code;
218 char *star_ptr;
219 char *query;
220 int ret;
221
222 if (input_code == NULL || strlen(input_code) == 0 || strlen(input_code) > 5) {
223 return CANGJIE_INVALID;
224 }
225
226 if (input_code[0] == '*' || input_code[strlen(input_code) - 1] == '*') {
227 return CANGJIE_INVALID;
228 }
229
230 // Start with the Cangjie instance's cj_query
231 cj_query = calloc(strlen(cj->cj_query) + MAX_LEN_CODE_QUERY + 1,
232 sizeof(char));
233 if (cj_query == NULL) {
234 return CANGJIE_NOMEM;
235 }
236
237 strcpy(cj_query, cj->cj_query);
238
239 query_code = calloc(6, sizeof(char));
240 if (query_code == NULL) {
241 free(cj_query);
242 return CANGJIE_NOMEM;
243 }
244 strncpy(query_code, input_code, 5);
245
246 // Handle optional wildcards
247 star_ptr = strchr(query_code, '*');
248 if (star_ptr == NULL) {
249 strcat(cj_query, "AND code = '%q';");
250 } else {
251 strcat(cj_query, "AND code GLOB '%q';");
252 }
253
254 query = sqlite3_mprintf(cj_query, cj->version, query_code);
255
256 free(query_code);
257 free(cj_query);
258
259 if (query == NULL) {
260 return CANGJIE_NOMEM;
261 }
262
263 ret = sqlite3_prepare_v2(cj->db, query, -1, &stmt, 0);
264 if (ret != SQLITE_OK) {
265 // FIXME: Unhandled error codes
266 return ret;
267 }
268
269 sqlite3_free(query);
270
271 while (1) {
272 ret = sqlite3_step(stmt);
273
274 if (ret == SQLITE_ROW) {
275 char *chchar = (char *)sqlite3_column_text(stmt, 0);
276 char *code = (char *)sqlite3_column_text(stmt, 1);
277 uint32_t frequency = (uint32_t)sqlite3_column_int(stmt, 2);
278
279 CangjieChar *c;
280 int ret = cangjie_char_new(&c, chchar, code, frequency);
281 if (ret != CANGJIE_OK) {
282 return ret;
283 }
284
285 ret = cangjie_char_list_prepend(&tmp, c);
286 if (ret != CANGJIE_OK) {
287 return ret;
288 }
289 } else if(ret == SQLITE_DONE) {
290 // All rows finished
291 sqlite3_finalize(stmt);
292 break;
293 } else {
294 // Some error encountered
295 return CANGJIE_DBERROR;
296 }
297 }
298
299 if (tmp == NULL) {
300 return CANGJIE_NOCHARS;
301 }
302
303 *l = tmp;
304
305 return CANGJIE_OK;
306 }
307
cangjie_get_characters_by_shortcode(Cangjie * cj,char * input_code,CangjieCharList ** l)308 int cangjie_get_characters_by_shortcode(Cangjie *cj,
309 char *input_code,
310 CangjieCharList **l) {
311 CangjieCharList *tmp = NULL;
312
313 sqlite3_stmt *stmt;
314 int ret;
315 char *query;
316
317 if (input_code == NULL || strlen(input_code) != 1) {
318 return CANGJIE_INVALID;
319 }
320
321 query = sqlite3_mprintf(cj->shortcode_query, 0, input_code);
322 if (query == NULL) {
323 return CANGJIE_NOMEM;
324 }
325
326 ret = sqlite3_prepare_v2(cj->db, query, -1, &stmt, 0);
327 if (ret != SQLITE_OK) {
328 // FIXME: Unhandled error codes
329 return ret;
330 }
331
332 sqlite3_free(query);
333
334 while (1) {
335 ret = sqlite3_step(stmt);
336
337 if (ret == SQLITE_ROW) {
338 char *chchar = (char *)sqlite3_column_text(stmt, 0);
339 uint32_t frequency = (uint32_t)sqlite3_column_int(stmt, 2);
340
341 CangjieChar *c;
342 int ret = cangjie_char_new(&c, chchar, input_code, frequency);
343 if (ret != CANGJIE_OK) {
344 return ret;
345 }
346
347 ret = cangjie_char_list_prepend(&tmp, c);
348 if (ret != CANGJIE_OK) {
349 return ret;
350 }
351 } else if(ret == SQLITE_DONE) {
352 // All rows finished
353 sqlite3_finalize(stmt);
354 break;
355 } else {
356 // Some error encountered
357 return CANGJIE_DBERROR;
358 }
359 }
360
361 if (tmp == NULL) {
362 return CANGJIE_NOCHARS;
363 }
364
365 *l = tmp;
366
367 return CANGJIE_OK;
368 }
369
cangjie_get_radical(Cangjie * cj,const char key,char ** radical)370 int cangjie_get_radical(Cangjie *cj,
371 const char key,
372 char **radical) {
373 if ((key < 'a' || key > 'z') && (key != '*')) {
374 return CANGJIE_INVALID;
375 }
376
377 if (key == '*') {
378 // Special case for the wildcard '*'
379 *radical = "*";
380 } else {
381 // The actual Cangjie radicals
382 *radical = (char *)cangjie_radicals[key - 'a'];
383 }
384
385 return CANGJIE_OK;
386 }
387
cangjie_is_input_key(Cangjie * cj,const char key)388 int cangjie_is_input_key(Cangjie *cj,
389 const char key) {
390 if (key < 'a' || key > 'z') {
391 return CANGJIE_INVALID;
392 }
393
394 return CANGJIE_OK;
395 }
396
cangjie_free(Cangjie * cj)397 int cangjie_free(Cangjie *cj) {
398 sqlite3_close(cj->db);
399 free(cj->cj_query);
400 free(cj->shortcode_query);
401 free(cj);
402
403 return CANGJIE_OK;
404 }
405