1 /*
2  * this file is ported from kdelibs/kdeui/kcharselectdata.cpp
3  *
4  * original file is licensed under GPLv2+
5  */
6 
7 #include <stdint.h>
8 #include <ctype.h>
9 #include <libintl.h>
10 #include <fcitx-utils/uthash.h>
11 #include <fcitx-utils/utils.h>
12 #include <fcitx-config/xdg.h>
13 #include <fcitx/fcitx.h>
14 #if defined(__linux__) || defined(__GLIBC__)
15 #include <endian.h>
16 #else
17 #include <sys/endian.h>
18 #endif
19 #include "charselectdata.h"
20 
21 /* constants for hangul (de)composition, see UAX #15 */
22 #define SBase 0xAC00
23 #define LBase 0x1100
24 #define VBase 0x1161
25 #define TBase 0x11A7
26 #define LCount 19
27 #define VCount 21
28 #define TCount 28
29 #define NCount (VCount * TCount)
30 #define SCount (LCount * NCount)
31 #define HASH_FIND_UNICODE(head,findint,out)                                         \
32     HASH_FIND(hh,head,findint,sizeof(uint32_t),out)
33 #define HASH_ADD_UNICODE(head,intfield,add)                                         \
34     HASH_ADD(hh,head,intfield,sizeof(uint32_t),add)
35 
36 typedef struct _UnicodeSet {
37     uint32_t unicode;
38     UT_hash_handle hh;
39 } UnicodeSet;
40 
41 static const char JAMO_L_TABLE[][4] = {
42     "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
43     "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
44 };
45 
46 static const char JAMO_V_TABLE[][4] = {
47     "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
48     "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
49     "YU", "EU", "YI", "I"
50 };
51 
52 static const char JAMO_T_TABLE[][4] = {
53     "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
54     "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
55     "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
56 };
57 
uni_cmp(const void * a,const void * b)58 int uni_cmp(const void* a, const void* b) {
59     const UnicodeSet* sa = a;
60     const UnicodeSet* sb = b;
61     return sa->unicode - sb->unicode;
62 }
63 
pindex_cmp(const void * a,const void * b)64 int pindex_cmp(const void* a, const void* b) {
65     CharSelectDataIndex* const* pa = a;
66     CharSelectDataIndex* const* pb = b;
67 
68     return strcasecmp((*pa)->key, (*pb)->key);
69 }
70 
index_search_cmp(const void * a,const void * b)71 int index_search_cmp(const void* a, const void* b) {
72     const char* s = a;
73     CharSelectDataIndex* const* pb = b;
74 
75     return strcasecmp(s, (*pb)->key);
76 }
77 
index_search_a_cmp(const void * a,const void * b)78 int index_search_a_cmp(const void* a, const void* b) {
79     const char* s = a;
80     CharSelectDataIndex* const* pb = b;
81 
82     int res, len;
83     len = strlen(s);
84     res = strncasecmp(s, (*pb)->key, len);
85     if (res)
86         return res;
87     else
88         return 1;
89 }
90 
91 UT_array* SplitString(const char* s);
92 
93 char* FormatCode(uint32_t code, int length, const char* prefix);
94 UnicodeSet* CharSelectDataGetMatchingChars(CharSelectData* charselect, const char* s);
95 
FromLittleEndian32(const char * d)96 uint32_t FromLittleEndian32(const char* d)
97 {
98     const uint8_t* data = (const uint8_t*) d;
99     uint32_t t;
100     memcpy(&t, data, sizeof(t));
101     return le32toh(t);
102 }
103 
FromLittleEndian16(const char * d)104 uint16_t FromLittleEndian16(const char* d)
105 {
106     const uint8_t* data = (const uint8_t*) d;
107     uint16_t t;
108     memcpy(&t, data, sizeof(t));
109     return le16toh(t);
110 }
111 
CharSelectDataCreate()112 CharSelectData* CharSelectDataCreate()
113 {
114     CharSelectData* charselect = fcitx_utils_new(CharSelectData);
115 
116     do {
117 
118         FILE* fp = FcitxXDGGetFileWithPrefix("data", "charselectdata", "r", NULL);
119         if (!fp)
120             break;
121 
122         fseek(fp, 0, SEEK_END);
123         long int size = ftell(fp);
124         fseek(fp, 0, SEEK_SET);
125 
126         charselect->size = size;
127         charselect->dataFile = fcitx_utils_malloc0(size);
128         fread(charselect->dataFile, 1, size, fp);
129 
130         fclose(fp);
131 
132         CharSelectDataCreateIndex(charselect);
133 
134         return charselect;
135     } while(0);
136 
137     free(charselect);
138     return NULL;
139 }
140 
CharSelectDataUnihanInfo(CharSelectData * charselect,uint32_t unicode)141 UT_array* CharSelectDataUnihanInfo(CharSelectData* charselect, uint32_t unicode)
142 {
143     UT_array* res = fcitx_utils_new_string_list();
144 
145     const char* data = charselect->dataFile;
146     const uint32_t offsetBegin = FromLittleEndian32(data+36);
147     const uint32_t offsetEnd = charselect->size;
148 
149     int min = 0;
150     int mid;
151     int max = ((offsetEnd - offsetBegin) / 32) - 1;
152 
153     while (max >= min) {
154         mid = (min + max) / 2;
155         const uint32_t midUnicode = FromLittleEndian16(data + offsetBegin + mid*32);
156         if (unicode > midUnicode)
157             min = mid + 1;
158         else if (unicode < midUnicode)
159             max = mid - 1;
160         else {
161             int i;
162             for(i = 0; i < 7; i++) {
163                 uint32_t offset = FromLittleEndian32(data + offsetBegin + mid*32 + 4 + i*4);
164                 const char* empty = "";
165                 if(offset != 0) {
166                     const char* r = data + offset;
167                     utarray_push_back(res, &r);
168                 } else {
169                     utarray_push_back(res, &empty);
170                 }
171             }
172             return res;
173         }
174     }
175 
176     return res;
177 }
178 
CharSelectDataGetDetailIndex(CharSelectData * charselect,uint32_t unicode)179 uint32_t CharSelectDataGetDetailIndex(CharSelectData* charselect, uint32_t unicode)
180 {
181     const char* data = charselect->dataFile;
182     // Convert from little-endian, so that this code works on PPC too.
183     // http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=482286
184     const uint32_t offsetBegin = FromLittleEndian32(data+12);
185     const uint32_t offsetEnd = FromLittleEndian32(data+16);
186 
187     int min = 0;
188     int mid;
189     int max = ((offsetEnd - offsetBegin) / 29) - 1;
190 
191     static uint32_t most_recent_searched;
192     static uint32_t most_recent_result;
193 
194 
195     if (unicode == most_recent_searched)
196         return most_recent_result;
197 
198     most_recent_searched = unicode;
199 
200     while (max >= min) {
201         mid = (min + max) / 2;
202         const uint32_t midUnicode = FromLittleEndian16(data + offsetBegin + mid*29);
203         if (unicode > midUnicode)
204             min = mid + 1;
205         else if (unicode < midUnicode)
206             max = mid - 1;
207         else {
208             most_recent_result = offsetBegin + mid*29;
209 
210             return most_recent_result;
211         }
212     }
213 
214     most_recent_result = 0;
215     return 0;
216 }
217 
CharSelectDataName(CharSelectData * charselect,uint32_t unicode)218 char* CharSelectDataName(CharSelectData* charselect, uint32_t unicode)
219 {
220     char* result = NULL;
221     do {
222         if ((unicode >= 0x3400 && unicode <= 0x4DB5)
223                 || (unicode >= 0x4e00 && unicode <= 0x9fa5)
224                 || (unicode >= 0x20000 && unicode <= 0x2A6D6)) {
225             asprintf(&result, "CJK UNIFIED IDEOGRAPH-%x", unicode);
226         } else if (unicode >= 0xac00 && unicode <= 0xd7af) {
227             /* compute hangul syllable name as per UAX #15 */
228             int SIndex = unicode - SBase;
229             int LIndex, VIndex, TIndex;
230 
231             if (SIndex < 0 || SIndex >= SCount) {
232                 result = strdup("");
233                 break;
234             }
235 
236             LIndex = SIndex / NCount;
237             VIndex = (SIndex % NCount) / TCount;
238             TIndex = SIndex % TCount;
239 
240             fcitx_utils_alloc_cat_str(result, "HANGUL SYLLABLE ",
241                                       JAMO_L_TABLE[LIndex],
242                                       JAMO_V_TABLE[VIndex],
243                                       JAMO_T_TABLE[TIndex]);
244         } else if (unicode >= 0xD800 && unicode <= 0xDB7F)
245             result = strdup(_("<Non Private Use High Surrogate>"));
246         else if (unicode >= 0xDB80 && unicode <= 0xDBFF)
247             result = strdup(_("<Private Use High Surrogate>"));
248         else if (unicode >= 0xDC00 && unicode <= 0xDFFF)
249             result = strdup(_("<Low Surrogate>"));
250         else if (unicode >= 0xE000 && unicode <= 0xF8FF)
251             result = strdup(_("<Private Use>"));
252         else {
253 
254         const char* data = charselect->dataFile;
255             const uint32_t offsetBegin = FromLittleEndian32(data+4);
256             const uint32_t offsetEnd = FromLittleEndian32(data+8);
257 
258             int min = 0;
259             int mid;
260             int max = ((offsetEnd - offsetBegin) / 8) - 1;
261 
262             while (max >= min) {
263                 mid = (min + max) / 2;
264                 const uint32_t midUnicode = FromLittleEndian32(data + offsetBegin + mid*8);
265                 if (unicode > midUnicode)
266                     min = mid + 1;
267                 else if (unicode < midUnicode)
268                     max = mid - 1;
269                 else {
270                     uint32_t offset = FromLittleEndian32(data + offsetBegin + mid*8 + 4);
271                     result = strdup(charselect->dataFile + offset + 1);
272                     break;
273                 }
274             }
275         }
276     } while(0);
277 
278     if (!result) {
279         result = strdup(_("<not assigned>"));
280     }
281     return result;
282 }
283 
Simplified(const char * src)284 char* Simplified(const char* src)
285 {
286     char* s = strdup(src);
287     char* o = s;
288     char* p = s;
289     int lastIsSpace = 0;
290     while(*s) {
291         char c = *s;
292 
293         if (isspace(c)) {
294             if (!lastIsSpace) {
295                 *p = ' ';
296                 p ++;
297             }
298             lastIsSpace = 1;
299         }
300         else {
301             *p = c;
302             p++;
303             lastIsSpace = 0;
304         }
305         s++;
306     }
307     return o;
308 }
309 
IsHexString(const char * s)310 int IsHexString(const char* s)
311 {
312     size_t l = strlen(s);
313     if (l < 6)
314         return 0;
315     if (!((s[0] == '0' && s[1] == 'x')
316       || (s[0] == '0' && s[1] == 'X')
317       || (s[0] == 'u' && s[1] == '+')
318       || (s[0] == 'U' && s[1] == '+'))) {
319         return 0;
320     }
321 
322     s += 2;
323     while (*s) {
324         if (!isxdigit(*s))
325             return 0;
326         s++;
327     }
328     return 1;
329 }
330 
UnicodeSetFree(UnicodeSet * set)331 void UnicodeSetFree(UnicodeSet* set) {
332     while (set) {
333         UnicodeSet* p = set;
334         HASH_DEL(set, p);
335         free(p);
336     }
337 }
338 
UnicodeSetIntersect(UnicodeSet * left,UnicodeSet * right)339 UnicodeSet* UnicodeSetIntersect(UnicodeSet* left, UnicodeSet* right)
340 {
341     do {
342         if (!left)
343             break;
344 
345         if (!right)
346             break;
347 
348         UnicodeSet* p = left;
349         while (p) {
350             UnicodeSet* find = NULL;
351             HASH_FIND_UNICODE(right, &p->unicode, find);
352             UnicodeSet* next = p->hh.next;
353             if (!find) {
354                 HASH_DEL(left, p);
355                 free(p);
356             }
357             else {
358                 HASH_DEL(right, find);
359                 free(find);
360             }
361 
362             p = next;
363         }
364 
365         UnicodeSetFree(right);
366         return left;
367     } while(0);
368 
369     if (left)
370         UnicodeSetFree(left);
371 
372     if (right)
373         UnicodeSetFree(right);
374 
375     return NULL;
376 }
377 
CharSelectDataFind(CharSelectData * charselect,const char * needle)378 UT_array* CharSelectDataFind(CharSelectData* charselect, const char* needle)
379 {
380     UnicodeSet *result = NULL;
381 
382     UT_array* returnRes;
383     utarray_new(returnRes, fcitx_int32_icd);
384     char* simplified = Simplified(needle);
385     UT_array* searchStrings = SplitString(simplified);
386 
387     if (strlen(simplified) == 1) {
388         // search for hex representation of the character
389         utarray_clear(searchStrings);
390         char* format = FormatCode(simplified[0], 4, "U+");
391         utarray_push_back(searchStrings, &format);
392         free(format);
393     }
394     free(simplified);
395 
396     if (utarray_len(searchStrings) == 0) {
397         return returnRes;
398     }
399 
400     utarray_foreach(s, searchStrings, char*) {
401         char* end = NULL;
402         if(IsHexString(*s)) {
403             end = NULL;
404             uint32_t uni = (uint32_t) strtoul(*s + 2, &end, 16);
405             utarray_push_back(returnRes, &uni);
406 
407             // search for "1234" instead of "0x1234"
408             char* news = strdup(*s + 2);
409             free(*s);
410             *s = news;
411         }
412         // try to parse string as decimal number
413         end = NULL;
414         uint32_t unicode = (uint32_t) strtoul(*s, &end, 10);
415         if (*end == '\0') {
416             utarray_push_back(returnRes, &unicode);
417         }
418     }
419 
420     int firstSubString = 1;
421     utarray_foreach(s2, searchStrings, char* ) {
422         UnicodeSet* partResult = CharSelectDataGetMatchingChars(charselect, *s2);
423         if (firstSubString) {
424             result = partResult;
425             firstSubString = 0;
426         } else {
427             result = UnicodeSetIntersect(result, partResult);
428         }
429         if (!result)
430             break;
431     }
432 
433     // remove results found by matching the code point to prevent duplicate results
434     // while letting these characters stay at the beginning
435     utarray_foreach(c, returnRes, uint32_t) {
436         UnicodeSet* dup = NULL;
437         HASH_FIND_UNICODE(result, c, dup);
438         if (dup)
439             HASH_DEL(result, dup);
440     }
441 
442     HASH_SORT(result, uni_cmp);
443 
444     while (result) {
445         UnicodeSet* p = result;
446         HASH_DEL(result, p);
447         uint32_t unicode = p->unicode;
448         utarray_push_back(returnRes, &unicode);
449         free(p);
450     }
451 
452     utarray_free(searchStrings);
453 
454     return returnRes;
455 }
456 
InsertResult(UnicodeSet * set,uint32_t unicode)457 UnicodeSet* InsertResult(UnicodeSet* set, uint32_t unicode) {
458     UnicodeSet* find = NULL;
459     HASH_FIND_UNICODE(set, &unicode, find);
460     if (!find) {
461         find = fcitx_utils_new(UnicodeSet);
462         find->unicode = unicode;
463         HASH_ADD_UNICODE(set, unicode, find);
464     }
465     return set;
466 }
467 
CharSelectDataGetMatchingChars(CharSelectData * charselect,const char * s)468 UnicodeSet* CharSelectDataGetMatchingChars(CharSelectData* charselect, const char* s)
469 {
470     UnicodeSet *result = NULL;
471     size_t s_l = strlen(s);
472     CharSelectDataIndex **pos;
473     CharSelectDataIndex **last;
474     pos = utarray_custom_bsearch(s, charselect->indexList, 0, index_search_cmp);
475     last = utarray_custom_bsearch(s, charselect->indexList,
476                                   0, index_search_a_cmp);
477     if (!pos)
478         return NULL;
479     if (!last)
480         last = (CharSelectDataIndex**)utarray_back(charselect->indexList);
481     while (pos != last && strncasecmp(s, (*pos)->key, s_l) == 0) {
482         utarray_foreach (c, (*pos)->items, uint32_t) {
483             result = InsertResult(result, *c);
484         }
485         ++pos;
486     }
487 
488     return result;
489 }
490 
CharSelectDataAliases(CharSelectData * charselect,uint32_t unicode)491 UT_array* CharSelectDataAliases(CharSelectData* charselect, uint32_t unicode)
492 {
493     const char* data = charselect->dataFile;
494     const int detailIndex = CharSelectDataGetDetailIndex(charselect, unicode);
495     if(detailIndex == 0) {
496         return fcitx_utils_new_string_list();
497     }
498 
499     const uint8_t count = * (uint8_t *)(data + detailIndex + 8);
500     uint32_t offset = FromLittleEndian32(data + detailIndex + 4);
501 
502     UT_array* aliases = fcitx_utils_new_string_list();
503 
504     int i;
505     for (i = 0;  i < count;  i++) {
506         const char* r = data + offset;
507         utarray_push_back(aliases, &r);
508         offset += strlen(data + offset) + 1;
509     }
510     return aliases;
511 }
512 
513 
CharSelectDataNotes(CharSelectData * charselect,uint32_t unicode)514 UT_array* CharSelectDataNotes(CharSelectData* charselect, uint32_t unicode)
515 {
516     const int detailIndex = CharSelectDataGetDetailIndex(charselect, unicode);
517     if(detailIndex == 0) {
518         return fcitx_utils_new_string_list();
519     }
520 
521     const char* data = charselect->dataFile;
522     const uint8_t count = * (uint8_t *)(data + detailIndex + 13);
523     uint32_t offset = FromLittleEndian32(data + detailIndex + 9);
524 
525     UT_array* notes = fcitx_utils_new_string_list();
526 
527     int i;
528     for (i = 0;  i < count;  i++) {
529         const char* r = data + offset;
530         utarray_push_back(notes, &r);
531         offset += strlen(data + offset) + 1;
532     }
533 
534     return notes;
535 }
536 
537 UT_array*
CharSelectDataSeeAlso(CharSelectData * charselect,uint32_t unicode)538 CharSelectDataSeeAlso(CharSelectData* charselect, uint32_t unicode)
539 {
540     UT_array *seeAlso;
541     utarray_new(seeAlso, fcitx_int32_icd);
542     const int detailIndex = CharSelectDataGetDetailIndex(charselect, unicode);
543     if(detailIndex == 0) {
544         return seeAlso;
545     }
546 
547     const char* data = charselect->dataFile;
548     const uint8_t count = * (uint8_t *)(data + detailIndex + 28);
549     uint32_t offset = FromLittleEndian32(data + detailIndex + 24);
550 
551     int i;
552     for (i = 0;  i < count;  i++) {
553         uint32_t c = FromLittleEndian16 (data + offset);
554         utarray_push_back(seeAlso, &c);
555         offset += 2;
556     }
557 
558     return seeAlso;
559 }
560 
CharSelectDataEquivalents(CharSelectData * charselect,uint32_t unicode)561 UT_array* CharSelectDataEquivalents(CharSelectData* charselect, uint32_t unicode)
562 {
563     const int detailIndex = CharSelectDataGetDetailIndex(charselect, unicode);
564     if(detailIndex == 0) {
565         return fcitx_utils_new_string_list();
566     }
567 
568     const char* data = charselect->dataFile;
569     const uint8_t count = * (uint8_t *)(data + detailIndex + 23);
570     uint32_t offset = FromLittleEndian32(data + detailIndex + 19);
571 
572     UT_array* equivalents = fcitx_utils_new_string_list();
573 
574     int i;
575     for (i = 0;  i < count;  i++) {
576         const char* r = data + offset;
577         utarray_push_back(equivalents, &r);
578         offset += strlen(data + offset) + 1;
579     }
580 
581     return equivalents;
582 }
583 
CharSelectDataApproximateEquivalents(CharSelectData * charselect,uint32_t unicode)584 UT_array* CharSelectDataApproximateEquivalents(CharSelectData* charselect, uint32_t unicode)
585 {
586     const int detailIndex = CharSelectDataGetDetailIndex(charselect, unicode);
587     if(detailIndex == 0) {
588         return fcitx_utils_new_string_list();
589     }
590 
591     const char* data = charselect->dataFile;
592     const uint8_t count = * (uint8_t *)(data + detailIndex + 18);
593     uint32_t offset = FromLittleEndian32(data + detailIndex + 14);
594 
595     UT_array* approxEquivalents = fcitx_utils_new_string_list();
596 
597     int i;
598     for (i = 0;  i < count;  i++) {
599         const char* r = data + offset;
600         utarray_push_back(approxEquivalents, &r);
601         offset += strlen(data + offset) + 1;
602     }
603 
604     return approxEquivalents;
605 }
606 
607 
FormatCode(uint32_t code,int length,const char * prefix)608 char* FormatCode(uint32_t code, int length, const char* prefix)
609 {
610     char* s = NULL;
611     char* fmt = NULL;
612     asprintf(&fmt, "%%s%%0%dX", length);
613     asprintf(&s, fmt, prefix, code);
614     free(fmt);
615     return s;
616 }
617 
SplitString(const char * s)618 UT_array* SplitString(const char* s)
619 {
620     UT_array* result = fcitx_utils_new_string_list();
621     int start = 0;
622     int end = 0;
623     int length = strlen(s);
624     while (end < length) {
625         while (end < length && (isdigit(s[end]) || isalpha(s[end]) || s[end] == '+')) {
626             end++;
627         }
628         if (start != end) {
629             char* p = strndup(&s[start], end - start);
630             utarray_push_back(result, &p);
631             free(p);
632         }
633         start = end;
634         while (end < length && !(isdigit(s[end]) || isalpha(s[end]) || s[end] == '+')) {
635             end++;
636             start++;
637         }
638     }
639     return result;
640 }
641 
CharSelectDataIndexNew(const char * key)642 CharSelectDataIndex* CharSelectDataIndexNew(const char* key)
643 {
644     CharSelectDataIndex* idx = fcitx_utils_new(CharSelectDataIndex);
645     idx->key = strdup(key);
646     utarray_new(idx->items, fcitx_int32_icd);
647     return idx;
648 }
649 
CharSelectDataAppendToIndex(CharSelectData * charselect,uint32_t unicode,const char * str)650 void CharSelectDataAppendToIndex(CharSelectData* charselect, uint32_t unicode, const char* str)
651 {
652     UT_array* strings = SplitString(str);
653     utarray_foreach(s, strings, char*) {
654         CharSelectDataIndex* item = NULL;
655         HASH_FIND_STR(charselect->index, *s, item);
656         if (!item) {
657             item = CharSelectDataIndexNew(*s);
658             HASH_ADD_KEYPTR(hh, charselect->index, item->key, strlen(item->key), item);
659         }
660         utarray_push_back(item->items, &unicode);
661     }
662     utarray_free(strings);
663 }
664 
CharSelectDataDump(CharSelectData * charselect)665 void CharSelectDataDump(CharSelectData* charselect)
666 {
667     //CharSelectDataIndex* item = charselect->index;
668     /*
669     while(item) {
670         fprintf(stderr, "%s\n", item->key);
671         item = item->hh.next;
672     } */
673 
674     utarray_foreach(p, charselect->indexList, CharSelectDataIndex*) {
675         fprintf(stderr, "%s\n", (*p)->key);
676     }
677 }
678 
CharSelectDataCreateIndex(CharSelectData * charselect)679 void CharSelectDataCreateIndex(CharSelectData* charselect)
680 {
681     // character names
682     const char* data = charselect->dataFile;
683     const uint32_t nameOffsetBegin = FromLittleEndian32(data+4);
684     const uint32_t nameOffsetEnd = FromLittleEndian32(data+8);
685 
686     int max = ((nameOffsetEnd - nameOffsetBegin) / 8) - 1;
687 
688     int pos, j;
689 
690     for (pos = 0; pos <= max; pos++) {
691         const uint32_t unicode = FromLittleEndian32(data + nameOffsetBegin + pos*8);
692         uint32_t offset = FromLittleEndian32(data + nameOffsetBegin + pos*8 + 4);
693         // TODO
694         CharSelectDataAppendToIndex(charselect, unicode, (data + offset + 1));
695     }
696 
697     // details
698     const uint32_t detailsOffsetBegin = FromLittleEndian32(data+12);
699     const uint32_t detailsOffsetEnd = FromLittleEndian32(data+16);
700 
701     max = ((detailsOffsetEnd - detailsOffsetBegin) / 29) - 1;
702     for (pos = 0; pos <= max; pos++) {
703         const uint32_t unicode = FromLittleEndian32(data + detailsOffsetBegin + pos*29);
704 
705         // aliases
706         const uint8_t aliasCount = * (uint8_t *)(data + detailsOffsetBegin + pos*29 + 8);
707         uint32_t aliasOffset = FromLittleEndian32(data + detailsOffsetBegin + pos*29 + 4);
708 
709         for (j = 0;  j < aliasCount;  j++) {
710             CharSelectDataAppendToIndex(charselect, unicode, data + aliasOffset);
711             aliasOffset += strlen(data + aliasOffset) + 1;
712         }
713 
714         // notes
715         const uint8_t notesCount = * (uint8_t *)(data + detailsOffsetBegin + pos*29 + 13);
716         uint32_t notesOffset = FromLittleEndian32(data + detailsOffsetBegin + pos*29 + 9);
717 
718         for (j = 0;  j < notesCount;  j++) {
719             CharSelectDataAppendToIndex(charselect, unicode, data + notesOffset);
720             notesOffset += strlen(data + notesOffset) + 1;
721         }
722 
723         // approximate equivalents
724         const uint8_t apprCount = * (uint8_t *)(data + detailsOffsetBegin + pos*29 + 18);
725         uint32_t apprOffset = FromLittleEndian32(data + detailsOffsetBegin + pos*29 + 14);
726 
727         for (j = 0;  j < apprCount;  j++) {
728             CharSelectDataAppendToIndex(charselect, unicode,data + apprOffset);
729             apprOffset += strlen(data + apprOffset) + 1;
730         }
731 
732         // equivalents
733         const uint8_t equivCount = * (uint8_t *)(data + detailsOffsetBegin + pos*29 + 23);
734         uint32_t equivOffset = FromLittleEndian32(data + detailsOffsetBegin + pos*29 + 19);
735 
736         for (j = 0;  j < equivCount;  j++) {
737             CharSelectDataAppendToIndex(charselect, unicode, data + equivOffset);
738             equivOffset += strlen(data + equivOffset) + 1;
739         }
740 
741         // see also - convert to string (hex)
742         const uint8_t seeAlsoCount = * (uint8_t *)(data + detailsOffsetBegin + pos*29 + 28);
743         uint32_t seeAlsoOffset = FromLittleEndian32(data + detailsOffsetBegin + pos*29 + 24);
744 
745         for (j = 0;  j < seeAlsoCount;  j++) {
746             uint32_t seeAlso = FromLittleEndian16 (data + seeAlsoOffset);
747             char* code = FormatCode(seeAlso, 4, "");
748             CharSelectDataAppendToIndex(charselect, unicode, code);
749             free(code);
750             equivOffset += strlen(data + equivOffset) + 1;
751         }
752     }
753 
754     // unihan data
755     // temporary disabled due to the huge amount of data
756      const uint32_t unihanOffsetBegin = FromLittleEndian32(data+36);
757      const uint32_t unihanOffsetEnd = charselect->size;
758      max = ((unihanOffsetEnd - unihanOffsetBegin) / 32) - 1;
759 
760      for (pos = 0; pos <= max; pos++) {
761          const uint32_t unicode = FromLittleEndian32(data + unihanOffsetBegin + pos*32);
762          for(j = 0; j < 7; j++) {
763              uint32_t offset = FromLittleEndian32(data + unihanOffsetBegin + pos*32 + 4 + j*4);
764              if(offset != 0) {
765                  CharSelectDataAppendToIndex(charselect, unicode, (data + offset));
766              }
767          }
768      }
769 
770      utarray_new(charselect->indexList, fcitx_ptr_icd);
771 
772      CharSelectDataIndex* idx = charselect->index;
773      while(idx) {
774          utarray_push_back(charselect->indexList, &idx);
775          idx = idx->hh.next;
776      }
777 
778      utarray_sort(charselect->indexList, pindex_cmp);
779 }
780 
CharSelectDataFree(CharSelectData * charselect)781 void CharSelectDataFree(CharSelectData* charselect)
782 {
783     utarray_free(charselect->indexList);
784     while(charselect->index) {
785         CharSelectDataIndex* p = charselect->index;
786         HASH_DEL(charselect->index, p);
787         free(p->key);
788         utarray_free(p->items);
789         free(p);
790     }
791     free(charselect->dataFile);
792     free(charselect);
793 }
794