1 /* Copyright (c) 2013 - The libcangjie authors.
2  *
3  * This file is part of libcangjie.
4  *
5  * libcangjie is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcangjie is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcangjie.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include "cangjie.h"
24 
25 
26 #define BASE_QUERY "SELECT chchar, code, frequency\n" \
27                    "FROM chars\n" \
28                    "INNER JOIN codes on chars.char_index=codes.char_index\n" \
29                    "WHERE version=%d "
30 
31 // Longest possible filter query has a length of 127:
32 //     " AND ( big5 = 1 OR hkscs = 1 OR punct = 1 OR ... ) "
33 #define MAX_LEN_FILTER_QUERY 127
34 
35 // Longest possible code query has a length of 19:
36 //     "AND code GLOB '%q';"
37 #define MAX_LEN_CODE_QUERY 19
38 
39 
40 const char *cangjie_radicals[] = {
41     "\xE6\x97\xA5", // 日
42     "\xE6\x9C\x88", // 月
43     "\xE9\x87\x91", // 金
44     "\xE6\x9C\xA8", // 木
45     "\xE6\xB0\xB4", // 水
46     "\xE7\x81\xAB", // 火
47     "\xE5\x9C\x9F", // 土
48     "\xE7\xAB\xB9", // 竹
49     "\xE6\x88\x88", // 戈
50     "\xE5\x8D\x81", // 十
51     "\xE5\xA4\xA7", // 大
52     "\xE4\xB8\xAD", // 中
53     "\xE4\xB8\x80", // 一
54     "\xE5\xBC\x93", // 弓
55     "\xE4\xBA\xBA", // 人
56     "\xE5\xBF\x83", // 心
57     "\xE6\x89\x8B", // 手
58     "\xE5\x8F\xA3", // 口
59     "\xE5\xB0\xB8", // 尸
60     "\xE5\xBB\xBF", // 廿
61     "\xE5\xB1\xB1", // 山
62     "\xE5\xA5\xB3", // 女
63     "\xE7\x94\xB0", // 田
64     "\xE9\x9B\xA3", // 難
65     "\xE5\x8D\x9C", // 卜
66     "\xEF\xBC\xBA", // Z
67 };
68 
strcat_or_operator(uint32_t * first,char * query)69 static void strcat_or_operator(uint32_t *first, char *query) {
70     if (! *first) {
71         strcat(query, "OR ");
72     } else {
73         *first = 0;
74     }
75 }
76 
cangjie_get_filter_query(Cangjie * cj,char ** query)77 int cangjie_get_filter_query(Cangjie *cj, char **query) {
78     uint32_t first = 1;
79     if (cj->filter_flags == 0) {
80         // No filter means pass all, so let's return an empty string
81         *query = calloc(1, sizeof(char));
82         if (query == NULL) {
83             return CANGJIE_NOMEM;
84         }
85 
86         return CANGJIE_OK;
87     }
88 
89     *query = calloc(MAX_LEN_FILTER_QUERY + 1, sizeof(char));
90     if (query == NULL) {
91         return CANGJIE_NOMEM;
92     }
93 
94     strcat(*query, " AND ( ");
95 
96     if (cj->filter_flags & CANGJIE_FILTER_BIG5) {
97         strcat(*query, "big5 = 1 ");
98         first = 0;
99     }
100 
101     if (cj->filter_flags & CANGJIE_FILTER_HKSCS) {
102         strcat_or_operator(&first, *query);
103         strcat(*query, "hkscs = 1 ");
104     }
105 
106     if (cj->filter_flags & CANGJIE_FILTER_PUNCTUATION) {
107         strcat_or_operator(&first, *query);
108         strcat(*query, "punct = 1 ");
109     }
110 
111     if (cj->filter_flags & CANGJIE_FILTER_CHINESE) {
112         strcat_or_operator(&first, *query);
113         strcat(*query, "zh = 1 ");
114     }
115 
116     if (cj->filter_flags & CANGJIE_FILTER_ZHUYIN) {
117         strcat_or_operator(&first, *query);
118         strcat(*query, "zhuyin = 1 ");
119     }
120 
121     if (cj->filter_flags & CANGJIE_FILTER_KANJI) {
122         strcat_or_operator(&first, *query);
123         strcat(*query, "kanji = 1 ");
124     }
125 
126     if (cj->filter_flags & CANGJIE_FILTER_KATAKANA) {
127         strcat_or_operator(&first, *query);
128         strcat(*query, "katakana = 1 ");
129     }
130 
131     if (cj->filter_flags & CANGJIE_FILTER_HIRAGANA) {
132         strcat_or_operator(&first, *query);
133         strcat(*query, "hiragana = 1 ");
134     }
135 
136     if (cj->filter_flags & CANGJIE_FILTER_SYMBOLS) {
137         strcat_or_operator(&first, *query);
138         strcat(*query, "symbol = 1 ");
139     }
140 
141     strcat(*query, ") ");
142 
143     return CANGJIE_OK;
144 }
145 
cangjie_new(Cangjie ** cj,CangjieVersion version,CangjieFilter filter_flags)146 int cangjie_new(Cangjie        **cj,
147                 CangjieVersion   version,
148                 CangjieFilter    filter_flags) {
149     char *filter_query;
150     int ret;
151     char *database_path;
152     Cangjie *tmp = calloc(1, sizeof(Cangjie));
153     if (tmp == NULL) {
154         return CANGJIE_NOMEM;
155     }
156 
157     tmp->version = version;
158     tmp->filter_flags = filter_flags;
159 
160     // Prepare the main Cangjie query
161     tmp->cj_query = calloc(strlen(BASE_QUERY) + MAX_LEN_FILTER_QUERY + 1,
162                              sizeof(char));
163     if (tmp->cj_query == NULL) {
164         cangjie_free(tmp);
165         return CANGJIE_NOMEM;
166     }
167 
168     strcat(tmp->cj_query, BASE_QUERY);
169 
170     ret = cangjie_get_filter_query(tmp, &filter_query);
171     if (ret != CANGJIE_OK) {
172         cangjie_free(tmp);
173         return ret;
174     }
175 
176     strcat(tmp->cj_query, filter_query);
177     free(filter_query);
178 
179     // Prepare the query by short code
180     tmp->shortcode_query = calloc(strlen(BASE_QUERY) + MAX_LEN_CODE_QUERY + 1,
181                                   sizeof(char));
182     if (tmp->shortcode_query == NULL) {
183         cangjie_free(tmp);
184         return CANGJIE_NOMEM;
185     }
186 
187     strcat(tmp->shortcode_query, BASE_QUERY);
188     strcat(tmp->shortcode_query, "AND code = '%q';");
189 
190     // Check the CANGJIE_DB env var (it is useful for local testing)
191     database_path = getenv("CANGJIE_DB");
192     if (database_path != NULL) {
193         ret = sqlite3_open_v2(database_path, &tmp->db, SQLITE_OPEN_READONLY, NULL);
194     } else {
195         ret = sqlite3_open_v2(CANGJIE_DB, &tmp->db, SQLITE_OPEN_READONLY, NULL);
196     }
197     if (ret == SQLITE_CANTOPEN) {
198         cangjie_free(tmp);
199         return CANGJIE_DBOPEN;
200     } else if (ret != SQLITE_OK) {
201         cangjie_free(tmp);
202         // FIXME: Unhandled error codes
203         return ret;
204     }
205 
206     *cj = tmp;
207 
208     return CANGJIE_OK;
209 }
210 
cangjie_get_characters(Cangjie * cj,char * input_code,CangjieCharList ** l)211 int cangjie_get_characters(Cangjie          *cj,
212                            char             *input_code,
213                            CangjieCharList **l) {
214     CangjieCharList *tmp = NULL;
215     sqlite3_stmt *stmt;
216     char *cj_query;
217     char *query_code;
218     char *star_ptr;
219     char *query;
220     int ret;
221 
222     if (input_code == NULL || strlen(input_code) == 0 || strlen(input_code) > 5) {
223         return CANGJIE_INVALID;
224     }
225 
226     if (input_code[0] == '*' || input_code[strlen(input_code) - 1] == '*') {
227         return CANGJIE_INVALID;
228     }
229 
230     // Start with the Cangjie instance's cj_query
231     cj_query = calloc(strlen(cj->cj_query) + MAX_LEN_CODE_QUERY + 1,
232                       sizeof(char));
233     if (cj_query == NULL) {
234         return CANGJIE_NOMEM;
235     }
236 
237     strcpy(cj_query, cj->cj_query);
238 
239     query_code = calloc(6, sizeof(char));
240     if (query_code == NULL) {
241         free(cj_query);
242         return CANGJIE_NOMEM;
243     }
244     strncpy(query_code, input_code, 5);
245 
246     // Handle optional wildcards
247     star_ptr = strchr(query_code, '*');
248     if (star_ptr == NULL) {
249         strcat(cj_query, "AND code = '%q';");
250     } else {
251         strcat(cj_query, "AND code GLOB '%q';");
252     }
253 
254     query = sqlite3_mprintf(cj_query, cj->version, query_code);
255 
256     free(query_code);
257     free(cj_query);
258 
259     if (query == NULL) {
260         return CANGJIE_NOMEM;
261     }
262 
263     ret = sqlite3_prepare_v2(cj->db, query, -1, &stmt, 0);
264     if (ret != SQLITE_OK) {
265         // FIXME: Unhandled error codes
266         return ret;
267     }
268 
269     sqlite3_free(query);
270 
271     while (1) {
272         ret = sqlite3_step(stmt);
273 
274         if (ret == SQLITE_ROW) {
275             char *chchar = (char *)sqlite3_column_text(stmt, 0);
276             char *code = (char *)sqlite3_column_text(stmt, 1);
277             uint32_t frequency = (uint32_t)sqlite3_column_int(stmt, 2);
278 
279             CangjieChar *c;
280             int ret = cangjie_char_new(&c, chchar, code, frequency);
281             if (ret != CANGJIE_OK) {
282                 return ret;
283             }
284 
285             ret = cangjie_char_list_prepend(&tmp, c);
286             if (ret != CANGJIE_OK) {
287                 return ret;
288             }
289         } else if(ret == SQLITE_DONE) {
290             // All rows finished
291             sqlite3_finalize(stmt);
292             break;
293         } else {
294             // Some error encountered
295             return CANGJIE_DBERROR;
296         }
297     }
298 
299     if (tmp == NULL) {
300         return CANGJIE_NOCHARS;
301     }
302 
303     *l = tmp;
304 
305     return CANGJIE_OK;
306 }
307 
cangjie_get_characters_by_shortcode(Cangjie * cj,char * input_code,CangjieCharList ** l)308 int cangjie_get_characters_by_shortcode(Cangjie          *cj,
309                                         char             *input_code,
310                                         CangjieCharList **l) {
311     CangjieCharList *tmp = NULL;
312 
313     sqlite3_stmt *stmt;
314     int ret;
315     char *query;
316 
317     if (input_code == NULL || strlen(input_code) != 1) {
318         return CANGJIE_INVALID;
319     }
320 
321     query = sqlite3_mprintf(cj->shortcode_query, 0, input_code);
322     if (query == NULL) {
323         return CANGJIE_NOMEM;
324     }
325 
326     ret = sqlite3_prepare_v2(cj->db, query, -1, &stmt, 0);
327     if (ret != SQLITE_OK) {
328         // FIXME: Unhandled error codes
329         return ret;
330     }
331 
332     sqlite3_free(query);
333 
334     while (1) {
335         ret = sqlite3_step(stmt);
336 
337         if (ret == SQLITE_ROW) {
338             char *chchar = (char *)sqlite3_column_text(stmt, 0);
339             uint32_t frequency = (uint32_t)sqlite3_column_int(stmt, 2);
340 
341             CangjieChar *c;
342             int ret = cangjie_char_new(&c, chchar, input_code, frequency);
343             if (ret != CANGJIE_OK) {
344                 return ret;
345             }
346 
347             ret = cangjie_char_list_prepend(&tmp, c);
348             if (ret != CANGJIE_OK) {
349                 return ret;
350             }
351         } else if(ret == SQLITE_DONE) {
352             // All rows finished
353             sqlite3_finalize(stmt);
354             break;
355         } else {
356             // Some error encountered
357             return CANGJIE_DBERROR;
358         }
359     }
360 
361     if (tmp == NULL) {
362         return CANGJIE_NOCHARS;
363     }
364 
365     *l = tmp;
366 
367     return CANGJIE_OK;
368 }
369 
cangjie_get_radical(Cangjie * cj,const char key,char ** radical)370 int cangjie_get_radical(Cangjie     *cj,
371                         const char   key,
372                         char       **radical) {
373     if ((key < 'a' || key > 'z') && (key != '*')) {
374         return CANGJIE_INVALID;
375     }
376 
377     if (key == '*') {
378         // Special case for the wildcard '*'
379         *radical = "*";
380     } else {
381         // The actual Cangjie radicals
382         *radical = (char *)cangjie_radicals[key - 'a'];
383     }
384 
385     return CANGJIE_OK;
386 }
387 
cangjie_is_input_key(Cangjie * cj,const char key)388 int cangjie_is_input_key(Cangjie    *cj,
389                          const char  key) {
390     if (key < 'a' || key > 'z') {
391         return CANGJIE_INVALID;
392     }
393 
394     return CANGJIE_OK;
395 }
396 
cangjie_free(Cangjie * cj)397 int cangjie_free(Cangjie *cj) {
398     sqlite3_close(cj->db);
399     free(cj->cj_query);
400     free(cj->shortcode_query);
401     free(cj);
402 
403     return CANGJIE_OK;
404 }
405