1 /***************************************************************************
2  *   Copyright (C) 2012~2012 by Yichao Yu                                  *
3  *   yyc1992@gmail.com                                                     *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.              *
19  ***************************************************************************/
20 
21 #include "fcitx-utils/utils.h"
22 #include "fcitx-utils/memory.h"
23 #include "pinyin-enhance-stroke.h"
24 
25 static inline uint32_t
py_enhance_single_offset(uint8_t i)26 py_enhance_single_offset(uint8_t i)
27 {
28     return i;
29 }
30 #define _py_enhance_single_offset(dummy, i, ...)        \
31     (py_enhance_single_offset)(i)
32 #define py_enhance_single_offset(args...)       \
33     _py_enhance_single_offset(NULL, ##args, 0)
34 #define py_enhance_get_single(t, args...)                               \
35     (((PyEnhanceStrokeTree*)(t))->table + py_enhance_single_offset(args))
36 
37 static inline uint32_t
py_enhance_double_offset(uint8_t i1,uint8_t i2)38 py_enhance_double_offset(uint8_t i1, uint8_t i2)
39 {
40     return 5 + i1 * 5 + i2;
41 }
42 #define _py_enhance_double_offset(dummy, i1, i2, ...)   \
43     (py_enhance_double_offset)(i1, i2)
44 #define py_enhance_double_offset(args...)       \
45     _py_enhance_double_offset(NULL, ##args, 0)
46 #define py_enhance_get_double(t, args...)                               \
47     (((PyEnhanceStrokeTree*)(t))->table + py_enhance_double_offset(args))
48 
49 static inline uint32_t
py_enhance_multiple_offset(uint8_t i1,uint8_t i2,uint8_t i3)50 py_enhance_multiple_offset(uint8_t i1, uint8_t i2, uint8_t i3)
51 {
52     return 5 + 5 * 5 + i1 * 5 * 5 + i2 * 5 + i3;
53 }
54 #define _py_enhance_multiple_offset(dummy, i1, i2, i3, ...)     \
55     (py_enhance_multiple_offset)(i1, i2, i3)
56 #define py_enhance_multiple_offset(args...)       \
57     _py_enhance_multiple_offset(NULL, ##args, 0)
58 #define py_enhance_get_multiple(t, args...)                               \
59     (((PyEnhanceStrokeTree*)(t))->table + py_enhance_multiple_offset(args))
60 
61 typedef struct {
62     /**
63      * same as word->next
64      **/
65     uint32_t words;
66     /**
67      * next % 2 != 0: end
68      **/
69     uint32_t next;
70     uint8_t key_l;
71     uint8_t prefix;
72     uint8_t key[1];
73 } PyEnhanceStrokeKey;
74 
75 static inline PyEnhanceStrokeKey*
_py_enhance_stroke_id_to_key(const PyEnhanceStrokeTree * tree,uint32_t id)76 _py_enhance_stroke_id_to_key(const PyEnhanceStrokeTree *tree, uint32_t id)
77 {
78     return (PyEnhanceStrokeKey*)(tree->keys.data + id);
79 }
80 
81 static inline PyEnhanceStrokeKey*
py_enhance_stroke_id_to_key(const PyEnhanceStrokeTree * tree,uint32_t id)82 py_enhance_stroke_id_to_key(const PyEnhanceStrokeTree *tree, uint32_t id)
83 {
84     if (id % 4 != 0)
85         return NULL;
86     return _py_enhance_stroke_id_to_key(tree, id);
87 }
88 
89 static inline PyEnhanceStrokeKey*
py_enhance_stroke_key_next(const PyEnhanceStrokeTree * tree,const PyEnhanceStrokeKey * k)90 py_enhance_stroke_key_next(const PyEnhanceStrokeTree *tree,
91                            const PyEnhanceStrokeKey *k)
92 {
93     return py_enhance_stroke_id_to_key(tree, k->next);
94 }
95 
96 static inline void
py_enhance_stroke_key_tonext(const PyEnhanceStrokeTree * tree,const PyEnhanceStrokeKey ** k)97 py_enhance_stroke_key_tonext(const PyEnhanceStrokeTree *tree,
98                              const PyEnhanceStrokeKey **k)
99 {
100     *k = py_enhance_stroke_key_next(tree, *k);
101 }
102 
103 #define PY_ENHANCE_STROKE_KEY_REAL_SIZE                 \
104     (((void*)((PyEnhanceStrokeKey*)NULL)->key) - NULL)
105 
106 static inline uint32_t
py_enhance_stroke_alloc_key(PyEnhanceStrokeTree * tree,const uint8_t * key_s,uint8_t key_l,PyEnhanceStrokeKey ** key_p)107 py_enhance_stroke_alloc_key(PyEnhanceStrokeTree *tree, const uint8_t *key_s,
108                             uint8_t key_l, PyEnhanceStrokeKey **key_p)
109 {
110     uint32_t size = PY_ENHANCE_STROKE_KEY_REAL_SIZE + key_l;
111     uint32_t id = py_enhance_buff_alloc(&tree->keys, size);
112     PyEnhanceStrokeKey *key = _py_enhance_stroke_id_to_key(tree, id);
113     key->key_l = key_l;
114     if (key_l)
115         memcpy(key->key, key_s, key_l);
116     *key_p = key;
117     return id;
118 }
119 
120 static inline uint32_t
py_enhance_stroke_alloc_word(PyEnhanceStrokeTree * tree,const char * word_s,uint8_t word_l,PyEnhanceStrokeWord ** word_p)121 py_enhance_stroke_alloc_word(PyEnhanceStrokeTree *tree, const char *word_s,
122                              uint8_t word_l, PyEnhanceStrokeWord **word_p)
123 {
124     uint32_t id;
125     id = py_enhance_buff_alloc(&tree->words, sizeof(PyEnhanceStrokeWord));
126     PyEnhanceStrokeWord *word = _py_enhance_stroke_id_to_word(tree, id);
127     memcpy(word->word, word_s, word_l);
128     word->word[word_l] = '\0';
129     *word_p = word;
130     return id;
131 }
132 
133 static inline uint8_t
py_enhance_stroke_sym_to_num(char c)134 py_enhance_stroke_sym_to_num(char c)
135 {
136     switch (c) {
137     case 'h':
138         return 0;
139     case 's':
140         return 1;
141     case 'p':
142         return 2;
143     case 'n':
144         return 3;
145     case 'z':
146         return 4;
147     }
148     return -1;
149 }
150 
151 typedef struct {
152     const PyEnhanceStrokeKey *key;
153     const uint8_t *key_s;
154     int diff;
155     int key_l;
156 } PyEnhanceStrokeKeyLookup;
157 
158 typedef struct {
159     uint32_t word;
160     int distance;
161 } PyEnhanceStrokeResult;
162 
163 #define REPLACE_WEIGHT 5
164 #define INSERT_WEIGHT 5
165 #define REMOVE_WEIGHT 5
166 #define EXCHANGE_WEIGHT 5
167 #define END_WEIGHT 1
168 static inline int
py_enhance_stroke_get_distance(const uint8_t * word_p,int word_len,const uint8_t * dict_p,int dict_len)169 py_enhance_stroke_get_distance(const uint8_t *word_p, int word_len,
170                                const uint8_t *dict_p, int dict_len)
171 {
172     int replace = 0;
173     int insert = 0;
174     int remove = 0;
175     int exchange = 0;
176     int diff = 0;
177     int maxdiff;
178     int maxremove;
179     int word_i = 0;
180     int dict_i = 0;
181     maxdiff = word_len / 3;
182     maxremove = (word_len - 2) / 3;
183     while ((diff = replace + insert + remove + exchange) <= maxdiff &&
184            remove <= maxremove) {
185         if (word_i >= word_len) {
186             return ((replace * REPLACE_WEIGHT + insert * INSERT_WEIGHT
187                      + remove * REMOVE_WEIGHT + exchange * EXCHANGE_WEIGHT)
188                     + (dict_len - dict_i) * END_WEIGHT);
189         }
190         if (dict_i >= dict_len) {
191             if (word_i + 1 < word_len)
192                 return -1;
193             remove++;
194             if (diff + 1 <= maxdiff && remove <= maxremove) {
195                 return (replace * REPLACE_WEIGHT + insert * INSERT_WEIGHT
196                         + remove * REMOVE_WEIGHT + exchange * EXCHANGE_WEIGHT);
197             }
198             return -1;
199         }
200         if (word_p[word_i] == dict_p[dict_i]) {
201             word_i++;
202             dict_i++;
203             continue;
204         }
205         if (word_i + 1 >= word_len && dict_i + 1 >= dict_len) {
206             replace++;
207             word_i++;
208             dict_i++;
209             continue;
210         }
211         if (word_p[word_i + 1] == dict_p[dict_i + 1]) {
212             replace++;
213             dict_i += 2;
214             word_i += 2;
215             continue;
216         }
217         if (word_p[word_i + 1] == dict_p[dict_i]) {
218             word_i += 2;
219             if (word_p[word_i] == dict_p[dict_i + 1]) {
220                 dict_i += 2;
221                 exchange++;
222                 continue;
223             }
224             dict_i++;
225             remove++;
226             continue;
227         }
228         if (word_p[word_i] == dict_p[dict_i + 1]) {
229             word_i++;
230             dict_i += 2;
231             insert++;
232             continue;
233         }
234         break;
235     }
236     return (uint8_t)-1;
237 }
238 
239 int
py_enhance_stroke_get_match_keys(PinyinEnhance * pyenhance,const char * key_s,int key_l,PyEnhanceStrokeWord ** word_buff,int buff_len)240 py_enhance_stroke_get_match_keys(
241     PinyinEnhance *pyenhance, const char *key_s, int key_l,
242     PyEnhanceStrokeWord **word_buff, int buff_len)
243 {
244     int i;
245     int count = 0;
246     uint8_t *key_buff = malloc(key_l);
247     for (i = 0;i < key_l;i++) {
248         key_buff[i] = py_enhance_stroke_sym_to_num(key_s[i]);
249         if (fcitx_unlikely(key_buff[i] == (uint8_t)-1)) {
250             goto out;
251         }
252     }
253     const PyEnhanceStrokeTree *tree = &pyenhance->stroke_tree;
254     if (buff_len > 16)
255         buff_len = 16;
256     switch (key_l) {
257     case 1: {
258         uint32_t tmp_word = *py_enhance_get_single(tree, key_buff[0]);
259         if (tmp_word % 4 == 0) {
260             word_buff[0] = _py_enhance_stroke_id_to_word(tree, tmp_word);
261             count++;
262             if (count >= buff_len) {
263                 goto out;
264             }
265         }
266         const uint32_t *tmp_word_p = py_enhance_get_double(tree, key_buff[0]);
267         int left = buff_len - count;
268         if (left > 5)
269             left = 5;
270         for (i = 0;i < left;i++) {
271             word_buff[count + i] = _py_enhance_stroke_id_to_word(
272                 tree, tmp_word_p[i]);
273         }
274         count += left;
275         goto out;
276     }
277     case 2: {
278         uint32_t tmp_word;
279         tmp_word = *py_enhance_get_double(tree, key_buff[0], key_buff[1]);
280         if (tmp_word % 4 == 0) {
281             word_buff[0] = _py_enhance_stroke_id_to_word(tree, tmp_word);
282             count++;
283             if (count >= buff_len) {
284                 goto out;
285             }
286         }
287         const uint32_t *tmp_key_p;
288         const PyEnhanceStrokeKey *tmp_key;
289         tmp_key_p = py_enhance_get_multiple(tree, key_buff[0], key_buff[1]);
290         int left = buff_len - count;
291         if (left > 5)
292             left = 5;
293         for (i = 0;i < left;i++) {
294             tmp_key = py_enhance_stroke_id_to_key(tree, tmp_key_p[i]);
295             /**
296              * skip if there the prefix has no keys or the shortest key in the
297              * series is longer than 3 (i.e. > 0 after the prefix is removed.)
298              **/
299             if (!tmp_key || tmp_key->key_l)
300                 continue;
301             word_buff[count] = py_enhance_stroke_id_to_word(tree,
302                                                             tmp_key->words);
303             count++;
304         }
305         goto out;
306     }
307     default:
308         break;
309     }
310     // maximum size from (key_buff[0] == i || key_buff[1] == j)
311     PyEnhanceStrokeKeyLookup lookup[(5 + 5 - 1) * 5];
312     const PyEnhanceStrokeKey *tmp_key;
313     uint32_t tmp_key_id;
314     int lookup_c = 0;
315     int j;
316     int k;
317     uint8_t *key_p;
318     key_p = key_buff + 3;
319     key_l -= 3;
320     for (i = 0;i < 5;i++) {
321         for (j = 0;j < 5;j++) {
322             boolean diff0 = key_buff[0] != i;
323             boolean diff1 = key_buff[1] != j;
324             if (diff0 && diff1)
325                 continue;
326             for (k = 0;k < 5;k++) {
327                 tmp_key_id = *py_enhance_get_multiple(tree, i, j, k);
328                 if (tmp_key_id % 4 != 0)
329                     continue;
330                 tmp_key = _py_enhance_stroke_id_to_key(tree, tmp_key_id);
331                 PyEnhanceStrokeKeyLookup *lookup_p = lookup + lookup_c;
332                 if (key_buff[2] == k) {
333                     lookup_p->key = tmp_key;
334                     lookup_p->key_s = key_p;
335                     lookup_p->key_l = key_l;
336                     lookup_p->diff = diff0 + diff1;
337                     lookup_c++;
338                     continue;
339                 }
340                 if (diff0 || diff1)
341                     continue;
342                 lookup_p->key = tmp_key;
343                 lookup_p->key_s = key_p - 1;
344                 lookup_p->key_l = key_l + 1;
345                 lookup_p->diff = 1;
346                 lookup_c++;
347             }
348         }
349     }
350     int cur_len = key_l * 2 / 3;
351     PyEnhanceStrokeResult res_buff[16];
352     while (lookup_c > 0 && (count < buff_len || cur_len <= key_l + 4)) {
353         /**
354          * check keys of certain length for all prefix.
355          **/
356         for (i = 0;i < lookup_c;i++) {
357             PyEnhanceStrokeKeyLookup *lookup_p = lookup + i;
358             /**
359              * remove a prefix if it is already reached the end.
360              **/
361             if (!lookup_p->key) {
362                 lookup_c--;
363                 memmove(lookup_p, lookup_p + 1,
364                         sizeof(PyEnhanceStrokeKeyLookup) * (lookup_c - i));
365                 i--;
366                 continue;
367             }
368             /**
369              * skip keys shorter than the current length.
370              **/
371             while (lookup_p->key && lookup_p->key->key_l < cur_len) {
372                 py_enhance_stroke_key_tonext(tree, &lookup_p->key);
373             }
374             for (;lookup_p->key && lookup_p->key->key_l == cur_len;
375                  py_enhance_stroke_key_tonext(tree, &lookup_p->key)) {
376                 int distance = py_enhance_stroke_get_distance(
377                     lookup_p->key_s, lookup_p->key_l,
378                     lookup_p->key->key, lookup_p->key->key_l);
379                 if (distance < 0)
380                     continue;
381                 distance += lookup_p->diff * REPLACE_WEIGHT;
382                 /**
383                  * insert in the ordered result array.
384                  **/
385                 for (j = 0;j < count;j++) {
386                     if (distance < res_buff[j].distance) {
387                         break;
388                     }
389                 }
390                 if (count < buff_len) {
391                     count++;
392                 } else if (j >= count) {
393                     continue;
394                 }
395                 PyEnhanceStrokeResult *pos = res_buff + j;
396                 int move_size = count - j - 1;
397                 if (move_size > 0) {
398                     memmove(pos + 1, pos,
399                             move_size * sizeof(PyEnhanceStrokeResult));
400                 }
401                 pos->word = lookup_p->key->words;
402                 pos->distance = distance;
403             }
404         }
405         cur_len++;
406     }
407     for (j = 0;j < count;j++) {
408         word_buff[j] = py_enhance_stroke_id_to_word(tree, res_buff[j].word);
409     }
410 out:
411     free(key_buff);
412     return count;
413 }
414 
415 static inline int
memcmp_len(const void * p1,size_t l1,const void * p2,size_t l2)416 memcmp_len(const void *p1, size_t l1, const void *p2, size_t l2)
417 {
418     if (l1 == l2)
419         return memcmp(p1, p2, l1);
420     return l1 < l2 ? -1 : 1;
421 }
422 
423 /**
424  * Add keys to the Singly-linked lists and add words to the array as well as
425  * recording the key_id in the word added.
426  **/
427 static void
py_enhance_stroke_add_word(PyEnhanceStrokeTree * tree,const uint8_t * key_s,int key_l,const char * word_s,int word_l)428 py_enhance_stroke_add_word(PyEnhanceStrokeTree *tree,
429                            const uint8_t *key_s, int key_l,
430                            const char *word_s, int word_l)
431 {
432     uint32_t key_id;
433     /**
434      * for key_l = 1, 2
435      *     key_id = offset_in_table * 2 + 1   (key_id % 4 = 1, 3)
436      * otherwise
437      *     key_id = offset_in_words_buffer + 2   (key_id % 4 = 2)
438      **/
439     switch (key_l) {
440     case 1:
441         key_id = py_enhance_single_offset(key_s[0]) * 2 + 1;
442         break;
443     case 2:
444         key_id = py_enhance_double_offset(key_s[0], key_s[1]) * 2 + 1;
445         break;
446     default: {
447         uint32_t *key_p;
448         PyEnhanceStrokeKey *key;
449         uint8_t prefix = key_s[0] * 5 * 5 + key_s[1] * 5 + key_s[2];
450         key_p = tree->table + prefix + 5 * 5 + 5;
451         key_id = *key_p;
452         int res;
453         key_s += 3;
454         key_l -= 3;
455         /**
456          * since all the words are ordered, which means res <= 0, the loop
457          * is actually not doing anything, it is here only to make the logic
458          * complete (and in case the data is incorrectly ordered).
459          **/
460         for (;;key_p = &key->next, key_id = *key_p) {
461             key = py_enhance_stroke_id_to_key(tree, key_id);
462             if (!key ||
463                 (res = memcmp_len(key_s, key_l, key->key, key->key_l)) < 0) {
464                 PyEnhanceStrokeKey *new_key;
465                 uint32_t new_id;
466                 new_id = py_enhance_stroke_alloc_key(tree, key_s, key_l,
467                                                      &new_key);
468                 *key_p = new_id;
469                 new_key->words = new_id + 2;
470                 new_key->next = key_id;
471                 new_key->prefix = prefix;
472                 key_id = new_id;
473                 break;
474             } else if (fcitx_likely(res == 0)) {
475                 break;
476             }
477         }
478         key_id += 2;
479     }
480     }
481     PyEnhanceStrokeWord *new_word;
482     py_enhance_stroke_alloc_word(tree, word_s, word_l, &new_word);
483     new_word->next = key_id;
484 }
485 
486 static inline uint32_t*
_py_enhance_stroke_key_get_words(PyEnhanceStrokeTree * tree,uint32_t key_id)487 _py_enhance_stroke_key_get_words(PyEnhanceStrokeTree *tree, uint32_t key_id)
488 {
489     if (key_id % 2 == 0) {
490         return &_py_enhance_stroke_id_to_key(tree, key_id - 2)->words;
491     }
492     return &tree->table[key_id / 2];
493 }
494 
495 #define PY_ENHANCE_STROKE_WORD_ALIGN_SIZE               \
496     fcitx_utils_align_to(sizeof(PyEnhanceStrokeWord),   \
497                          PY_ENHANCE_BUFF_ALIGH)
498 static void
py_enhance_stroke_load_finish(PyEnhanceStrokeTree * tree)499 py_enhance_stroke_load_finish(PyEnhanceStrokeTree *tree)
500 {
501     unsigned int words_l = tree->words.len / PY_ENHANCE_STROKE_WORD_ALIGN_SIZE;
502     /**
503      * sort the word array so that we can use bsearch to find a word later.
504      * half of loading time of the stroke table is spent here.
505      **/
506     /* struct timespec start, end; */
507     /* int t; */
508     /* clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start); */
509     qsort(tree->words.data, words_l, PY_ENHANCE_STROKE_WORD_ALIGN_SIZE,
510           (int (*)(const void*, const void*))strcmp);
511     /* clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end); */
512     /* t = ((end.tv_sec - start.tv_sec) * 1000000000) */
513     /*     + end.tv_nsec - start.tv_nsec; */
514     /* printf("%s, %d\n", __func__, t); */
515     unsigned int i;
516     uint32_t *words;
517     for (i = 0;i < words_l;i++) {
518         PyEnhanceStrokeWord *word;
519         uint32_t offset = PY_ENHANCE_STROKE_WORD_ALIGN_SIZE * i;
520         word = tree->words.data + offset;
521         words = _py_enhance_stroke_key_get_words(tree, word->next);
522         word->next = *words;
523         *words = offset;
524     }
525 }
526 
527 void
py_enhance_stroke_load_tree(PyEnhanceStrokeTree * tree,FILE * fp)528 py_enhance_stroke_load_tree(PyEnhanceStrokeTree *tree, FILE *fp)
529 {
530     char *buff = NULL;
531     char *key;
532     char *word;
533     unsigned int key_l;
534     int word_l;
535     size_t len;
536     memset(tree, 0, sizeof(PyEnhanceStrokeTree));
537     /**
538      * init each entry to the key_id (single and double)
539      * or other odd numbers (multiple).
540      **/
541     unsigned int i;
542     for (i = 0;i < sizeof(tree->table) / sizeof(uint32_t);i++)
543         tree->table[i] = i * 2 + 1;
544     /**
545      * reserve some space before loading to avoid repeating realloc
546      **/
547     py_enhance_buff_reserve(&tree->keys, 1024 * 1024 / 2 * 3);
548     py_enhance_buff_reserve(&tree->words, 1024 * 1024);
549     while (getline(&buff, &len, fp) != -1) {
550         /* remove leading spaces */
551         key = buff + strspn(buff, PYENHANCE_MAP_BLANK);
552         /* empty line or comment */
553         if (*key == '\0' || *key == '#')
554             continue;
555         /* find delimiter */
556         key_l = strspn(key, "12345");
557         if (fcitx_unlikely(key_l == 0 || key_l > 0xff))
558             continue;
559         word = key + key_l;
560         word_l = strspn(word, PYENHANCE_MAP_BLANK);
561         if (!word_l)
562             continue;
563         *word = '\0';
564         word += word_l;
565         word_l = strcspn(word, PYENHANCE_MAP_BLANK);
566         if (fcitx_unlikely(word_l == 0 || word_l > UTF8_MAX_LENGTH))
567             continue;
568         word[word_l] = '\0';
569         word_l++;
570         for (i = 0;i < key_l;i++)
571             key[i] -= '1';
572         py_enhance_stroke_add_word(tree, (uint8_t*)key, key_l, word, word_l);
573     }
574     py_enhance_stroke_load_finish(tree);
575     py_enhance_buff_shrink(&tree->keys);
576     py_enhance_buff_shrink(&tree->words);
577     fcitx_utils_free(buff);
578 }
579 
580 uint8_t*
py_enhance_stroke_find_stroke(PinyinEnhance * pyenhance,const char * str,uint8_t * stroke,unsigned int * len)581 py_enhance_stroke_find_stroke(PinyinEnhance *pyenhance, const char *str,
582                               uint8_t *stroke, unsigned int *len)
583 {
584     const PyEnhanceStrokeTree *tree = &pyenhance->stroke_tree;
585     *len = 0;
586     if (!tree->words.len)
587         goto out;
588     const PyEnhanceStrokeWord *word;
589     word = bsearch(str, tree->words.data,
590                    tree->words.len / PY_ENHANCE_STROKE_WORD_ALIGN_SIZE,
591                    PY_ENHANCE_STROKE_WORD_ALIGN_SIZE,
592                    (int (*)(const void*, const void*))strcmp);
593     if (!word)
594         goto out;
595     while (word->next % 4 == 0)
596         word = _py_enhance_stroke_id_to_word(tree, word->next);
597     uint8_t prefix;
598     if (word->next % 2 == 0) {
599         const PyEnhanceStrokeKey *key;
600         key = _py_enhance_stroke_id_to_key(tree, word->next - 2);
601         *len = 3 + key->key_l;
602         if (!stroke)
603             stroke = malloc(*len);
604         prefix = key->prefix;
605         stroke[0] = prefix / (5 * 5);
606         prefix -= stroke[0] * (5 * 5);
607         stroke[1] = prefix / 5;
608         stroke[2] = prefix - stroke[1] * 5;
609         if (key->key_l) {
610             memcpy(stroke + 3, key->key, key->key_l);
611         }
612     } else {
613         if (!stroke)
614             stroke = malloc(2);
615         if ((prefix = word->next / 2) < 5) {
616             *len = 1;
617             stroke[0] = prefix;
618         } else {
619             *len = 2;
620             prefix -= 5;
621             stroke[1] = prefix / 5;
622             stroke[0] = prefix - stroke[1] * 5;
623         }
624     }
625 out:
626     return stroke;
627 }
628 
629 static const PyEnhanceStrLen*
py_enhance_stroke_get_char(uint8_t s)630 py_enhance_stroke_get_char(uint8_t s)
631 {
632     static const PyEnhanceStrLen stroke_table[] = {
633         PY_STR_LEN("一"),
634         PY_STR_LEN("丨"),
635         PY_STR_LEN("丿"),
636         PY_STR_LEN("㇏"),
637         PY_STR_LEN("��"),
638         PY_STR_LEN("")
639     };
640     if (s >= 5)
641         return stroke_table + 5;
642     return stroke_table + s;
643 }
644 
645 char*
py_enhance_stroke_get_str(const uint8_t * stroke,unsigned int s_l,char * str,unsigned int * len)646 py_enhance_stroke_get_str(const uint8_t *stroke, unsigned int s_l,
647                           char *str, unsigned int *len)
648 {
649     const PyEnhanceStrLen *static_buff[256];
650     void *tofree;
651     const PyEnhanceStrLen **buff;
652     if (fcitx_likely(s_l <= 256)) {
653         tofree = NULL;
654         buff = static_buff;
655     } else {
656         tofree = malloc(sizeof(const PyEnhanceStrLen*) * s_l);
657         buff = tofree;
658     }
659     unsigned int i;
660     *len = 0;
661     for (i = 0;i < s_l;i++) {
662         buff[i] = py_enhance_stroke_get_char(stroke[i]);
663         *len += buff[i]->len;
664     }
665     if (!str)
666         str = malloc(*len + 1);
667     unsigned int accum_len = 0;
668     for (i = 0;i < s_l;i++) {
669         memcpy(str + accum_len, buff[i]->str, buff[i]->len);
670         accum_len += buff[i]->len;
671     }
672     fcitx_utils_free(tofree);
673     str[*len] = '\0';
674     return str;
675 }
676