1 /*
2 * this file is ported from kdelibs/kdeui/kcharselectdata.cpp
3 *
4 * original file is licensed under GPLv2+
5 */
6
7 #include <stdint.h>
8 #include <ctype.h>
9 #include <libintl.h>
10 #include <fcitx-utils/uthash.h>
11 #include <fcitx-utils/utils.h>
12 #include <fcitx-config/xdg.h>
13 #include <fcitx/fcitx.h>
14 #if defined(__linux__) || defined(__GLIBC__)
15 #include <endian.h>
16 #else
17 #include <sys/endian.h>
18 #endif
19 #include "charselectdata.h"
20
21 /* constants for hangul (de)composition, see UAX #15 */
22 #define SBase 0xAC00
23 #define LBase 0x1100
24 #define VBase 0x1161
25 #define TBase 0x11A7
26 #define LCount 19
27 #define VCount 21
28 #define TCount 28
29 #define NCount (VCount * TCount)
30 #define SCount (LCount * NCount)
31 #define HASH_FIND_UNICODE(head,findint,out) \
32 HASH_FIND(hh,head,findint,sizeof(uint32_t),out)
33 #define HASH_ADD_UNICODE(head,intfield,add) \
34 HASH_ADD(hh,head,intfield,sizeof(uint32_t),add)
35
36 typedef struct _UnicodeSet {
37 uint32_t unicode;
38 UT_hash_handle hh;
39 } UnicodeSet;
40
41 static const char JAMO_L_TABLE[][4] = {
42 "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
43 "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
44 };
45
46 static const char JAMO_V_TABLE[][4] = {
47 "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
48 "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
49 "YU", "EU", "YI", "I"
50 };
51
52 static const char JAMO_T_TABLE[][4] = {
53 "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
54 "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
55 "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
56 };
57
uni_cmp(const void * a,const void * b)58 int uni_cmp(const void* a, const void* b) {
59 const UnicodeSet* sa = a;
60 const UnicodeSet* sb = b;
61 return sa->unicode - sb->unicode;
62 }
63
pindex_cmp(const void * a,const void * b)64 int pindex_cmp(const void* a, const void* b) {
65 CharSelectDataIndex* const* pa = a;
66 CharSelectDataIndex* const* pb = b;
67
68 return strcasecmp((*pa)->key, (*pb)->key);
69 }
70
index_search_cmp(const void * a,const void * b)71 int index_search_cmp(const void* a, const void* b) {
72 const char* s = a;
73 CharSelectDataIndex* const* pb = b;
74
75 return strcasecmp(s, (*pb)->key);
76 }
77
index_search_a_cmp(const void * a,const void * b)78 int index_search_a_cmp(const void* a, const void* b) {
79 const char* s = a;
80 CharSelectDataIndex* const* pb = b;
81
82 int res, len;
83 len = strlen(s);
84 res = strncasecmp(s, (*pb)->key, len);
85 if (res)
86 return res;
87 else
88 return 1;
89 }
90
91 UT_array* SplitString(const char* s);
92
93 char* FormatCode(uint32_t code, int length, const char* prefix);
94 UnicodeSet* CharSelectDataGetMatchingChars(CharSelectData* charselect, const char* s);
95
FromLittleEndian32(const char * d)96 uint32_t FromLittleEndian32(const char* d)
97 {
98 const uint8_t* data = (const uint8_t*) d;
99 uint32_t t;
100 memcpy(&t, data, sizeof(t));
101 return le32toh(t);
102 }
103
FromLittleEndian16(const char * d)104 uint16_t FromLittleEndian16(const char* d)
105 {
106 const uint8_t* data = (const uint8_t*) d;
107 uint16_t t;
108 memcpy(&t, data, sizeof(t));
109 return le16toh(t);
110 }
111
CharSelectDataCreate()112 CharSelectData* CharSelectDataCreate()
113 {
114 CharSelectData* charselect = fcitx_utils_new(CharSelectData);
115
116 do {
117
118 FILE* fp = FcitxXDGGetFileWithPrefix("data", "charselectdata", "r", NULL);
119 if (!fp)
120 break;
121
122 fseek(fp, 0, SEEK_END);
123 long int size = ftell(fp);
124 fseek(fp, 0, SEEK_SET);
125
126 charselect->size = size;
127 charselect->dataFile = fcitx_utils_malloc0(size);
128 fread(charselect->dataFile, 1, size, fp);
129
130 fclose(fp);
131
132 CharSelectDataCreateIndex(charselect);
133
134 return charselect;
135 } while(0);
136
137 free(charselect);
138 return NULL;
139 }
140
CharSelectDataUnihanInfo(CharSelectData * charselect,uint32_t unicode)141 UT_array* CharSelectDataUnihanInfo(CharSelectData* charselect, uint32_t unicode)
142 {
143 UT_array* res = fcitx_utils_new_string_list();
144
145 const char* data = charselect->dataFile;
146 const uint32_t offsetBegin = FromLittleEndian32(data+36);
147 const uint32_t offsetEnd = charselect->size;
148
149 int min = 0;
150 int mid;
151 int max = ((offsetEnd - offsetBegin) / 32) - 1;
152
153 while (max >= min) {
154 mid = (min + max) / 2;
155 const uint32_t midUnicode = FromLittleEndian16(data + offsetBegin + mid*32);
156 if (unicode > midUnicode)
157 min = mid + 1;
158 else if (unicode < midUnicode)
159 max = mid - 1;
160 else {
161 int i;
162 for(i = 0; i < 7; i++) {
163 uint32_t offset = FromLittleEndian32(data + offsetBegin + mid*32 + 4 + i*4);
164 const char* empty = "";
165 if(offset != 0) {
166 const char* r = data + offset;
167 utarray_push_back(res, &r);
168 } else {
169 utarray_push_back(res, &empty);
170 }
171 }
172 return res;
173 }
174 }
175
176 return res;
177 }
178
CharSelectDataGetDetailIndex(CharSelectData * charselect,uint32_t unicode)179 uint32_t CharSelectDataGetDetailIndex(CharSelectData* charselect, uint32_t unicode)
180 {
181 const char* data = charselect->dataFile;
182 // Convert from little-endian, so that this code works on PPC too.
183 // http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=482286
184 const uint32_t offsetBegin = FromLittleEndian32(data+12);
185 const uint32_t offsetEnd = FromLittleEndian32(data+16);
186
187 int min = 0;
188 int mid;
189 int max = ((offsetEnd - offsetBegin) / 29) - 1;
190
191 static uint32_t most_recent_searched;
192 static uint32_t most_recent_result;
193
194
195 if (unicode == most_recent_searched)
196 return most_recent_result;
197
198 most_recent_searched = unicode;
199
200 while (max >= min) {
201 mid = (min + max) / 2;
202 const uint32_t midUnicode = FromLittleEndian16(data + offsetBegin + mid*29);
203 if (unicode > midUnicode)
204 min = mid + 1;
205 else if (unicode < midUnicode)
206 max = mid - 1;
207 else {
208 most_recent_result = offsetBegin + mid*29;
209
210 return most_recent_result;
211 }
212 }
213
214 most_recent_result = 0;
215 return 0;
216 }
217
CharSelectDataName(CharSelectData * charselect,uint32_t unicode)218 char* CharSelectDataName(CharSelectData* charselect, uint32_t unicode)
219 {
220 char* result = NULL;
221 do {
222 if ((unicode >= 0x3400 && unicode <= 0x4DB5)
223 || (unicode >= 0x4e00 && unicode <= 0x9fa5)
224 || (unicode >= 0x20000 && unicode <= 0x2A6D6)) {
225 asprintf(&result, "CJK UNIFIED IDEOGRAPH-%x", unicode);
226 } else if (unicode >= 0xac00 && unicode <= 0xd7af) {
227 /* compute hangul syllable name as per UAX #15 */
228 int SIndex = unicode - SBase;
229 int LIndex, VIndex, TIndex;
230
231 if (SIndex < 0 || SIndex >= SCount) {
232 result = strdup("");
233 break;
234 }
235
236 LIndex = SIndex / NCount;
237 VIndex = (SIndex % NCount) / TCount;
238 TIndex = SIndex % TCount;
239
240 fcitx_utils_alloc_cat_str(result, "HANGUL SYLLABLE ",
241 JAMO_L_TABLE[LIndex],
242 JAMO_V_TABLE[VIndex],
243 JAMO_T_TABLE[TIndex]);
244 } else if (unicode >= 0xD800 && unicode <= 0xDB7F)
245 result = strdup(_("<Non Private Use High Surrogate>"));
246 else if (unicode >= 0xDB80 && unicode <= 0xDBFF)
247 result = strdup(_("<Private Use High Surrogate>"));
248 else if (unicode >= 0xDC00 && unicode <= 0xDFFF)
249 result = strdup(_("<Low Surrogate>"));
250 else if (unicode >= 0xE000 && unicode <= 0xF8FF)
251 result = strdup(_("<Private Use>"));
252 else {
253
254 const char* data = charselect->dataFile;
255 const uint32_t offsetBegin = FromLittleEndian32(data+4);
256 const uint32_t offsetEnd = FromLittleEndian32(data+8);
257
258 int min = 0;
259 int mid;
260 int max = ((offsetEnd - offsetBegin) / 8) - 1;
261
262 while (max >= min) {
263 mid = (min + max) / 2;
264 const uint32_t midUnicode = FromLittleEndian32(data + offsetBegin + mid*8);
265 if (unicode > midUnicode)
266 min = mid + 1;
267 else if (unicode < midUnicode)
268 max = mid - 1;
269 else {
270 uint32_t offset = FromLittleEndian32(data + offsetBegin + mid*8 + 4);
271 result = strdup(charselect->dataFile + offset + 1);
272 break;
273 }
274 }
275 }
276 } while(0);
277
278 if (!result) {
279 result = strdup(_("<not assigned>"));
280 }
281 return result;
282 }
283
Simplified(const char * src)284 char* Simplified(const char* src)
285 {
286 char* s = strdup(src);
287 char* o = s;
288 char* p = s;
289 int lastIsSpace = 0;
290 while(*s) {
291 char c = *s;
292
293 if (isspace(c)) {
294 if (!lastIsSpace) {
295 *p = ' ';
296 p ++;
297 }
298 lastIsSpace = 1;
299 }
300 else {
301 *p = c;
302 p++;
303 lastIsSpace = 0;
304 }
305 s++;
306 }
307 return o;
308 }
309
IsHexString(const char * s)310 int IsHexString(const char* s)
311 {
312 size_t l = strlen(s);
313 if (l < 6)
314 return 0;
315 if (!((s[0] == '0' && s[1] == 'x')
316 || (s[0] == '0' && s[1] == 'X')
317 || (s[0] == 'u' && s[1] == '+')
318 || (s[0] == 'U' && s[1] == '+'))) {
319 return 0;
320 }
321
322 s += 2;
323 while (*s) {
324 if (!isxdigit(*s))
325 return 0;
326 s++;
327 }
328 return 1;
329 }
330
UnicodeSetFree(UnicodeSet * set)331 void UnicodeSetFree(UnicodeSet* set) {
332 while (set) {
333 UnicodeSet* p = set;
334 HASH_DEL(set, p);
335 free(p);
336 }
337 }
338
UnicodeSetIntersect(UnicodeSet * left,UnicodeSet * right)339 UnicodeSet* UnicodeSetIntersect(UnicodeSet* left, UnicodeSet* right)
340 {
341 do {
342 if (!left)
343 break;
344
345 if (!right)
346 break;
347
348 UnicodeSet* p = left;
349 while (p) {
350 UnicodeSet* find = NULL;
351 HASH_FIND_UNICODE(right, &p->unicode, find);
352 UnicodeSet* next = p->hh.next;
353 if (!find) {
354 HASH_DEL(left, p);
355 free(p);
356 }
357 else {
358 HASH_DEL(right, find);
359 free(find);
360 }
361
362 p = next;
363 }
364
365 UnicodeSetFree(right);
366 return left;
367 } while(0);
368
369 if (left)
370 UnicodeSetFree(left);
371
372 if (right)
373 UnicodeSetFree(right);
374
375 return NULL;
376 }
377
CharSelectDataFind(CharSelectData * charselect,const char * needle)378 UT_array* CharSelectDataFind(CharSelectData* charselect, const char* needle)
379 {
380 UnicodeSet *result = NULL;
381
382 UT_array* returnRes;
383 utarray_new(returnRes, fcitx_int32_icd);
384 char* simplified = Simplified(needle);
385 UT_array* searchStrings = SplitString(simplified);
386
387 if (strlen(simplified) == 1) {
388 // search for hex representation of the character
389 utarray_clear(searchStrings);
390 char* format = FormatCode(simplified[0], 4, "U+");
391 utarray_push_back(searchStrings, &format);
392 free(format);
393 }
394 free(simplified);
395
396 if (utarray_len(searchStrings) == 0) {
397 return returnRes;
398 }
399
400 utarray_foreach(s, searchStrings, char*) {
401 char* end = NULL;
402 if(IsHexString(*s)) {
403 end = NULL;
404 uint32_t uni = (uint32_t) strtoul(*s + 2, &end, 16);
405 utarray_push_back(returnRes, &uni);
406
407 // search for "1234" instead of "0x1234"
408 char* news = strdup(*s + 2);
409 free(*s);
410 *s = news;
411 }
412 // try to parse string as decimal number
413 end = NULL;
414 uint32_t unicode = (uint32_t) strtoul(*s, &end, 10);
415 if (*end == '\0') {
416 utarray_push_back(returnRes, &unicode);
417 }
418 }
419
420 int firstSubString = 1;
421 utarray_foreach(s2, searchStrings, char* ) {
422 UnicodeSet* partResult = CharSelectDataGetMatchingChars(charselect, *s2);
423 if (firstSubString) {
424 result = partResult;
425 firstSubString = 0;
426 } else {
427 result = UnicodeSetIntersect(result, partResult);
428 }
429 if (!result)
430 break;
431 }
432
433 // remove results found by matching the code point to prevent duplicate results
434 // while letting these characters stay at the beginning
435 utarray_foreach(c, returnRes, uint32_t) {
436 UnicodeSet* dup = NULL;
437 HASH_FIND_UNICODE(result, c, dup);
438 if (dup)
439 HASH_DEL(result, dup);
440 }
441
442 HASH_SORT(result, uni_cmp);
443
444 while (result) {
445 UnicodeSet* p = result;
446 HASH_DEL(result, p);
447 uint32_t unicode = p->unicode;
448 utarray_push_back(returnRes, &unicode);
449 free(p);
450 }
451
452 utarray_free(searchStrings);
453
454 return returnRes;
455 }
456
InsertResult(UnicodeSet * set,uint32_t unicode)457 UnicodeSet* InsertResult(UnicodeSet* set, uint32_t unicode) {
458 UnicodeSet* find = NULL;
459 HASH_FIND_UNICODE(set, &unicode, find);
460 if (!find) {
461 find = fcitx_utils_new(UnicodeSet);
462 find->unicode = unicode;
463 HASH_ADD_UNICODE(set, unicode, find);
464 }
465 return set;
466 }
467
CharSelectDataGetMatchingChars(CharSelectData * charselect,const char * s)468 UnicodeSet* CharSelectDataGetMatchingChars(CharSelectData* charselect, const char* s)
469 {
470 UnicodeSet *result = NULL;
471 size_t s_l = strlen(s);
472 CharSelectDataIndex **pos;
473 CharSelectDataIndex **last;
474 pos = utarray_custom_bsearch(s, charselect->indexList, 0, index_search_cmp);
475 last = utarray_custom_bsearch(s, charselect->indexList,
476 0, index_search_a_cmp);
477 if (!pos)
478 return NULL;
479 if (!last)
480 last = (CharSelectDataIndex**)utarray_back(charselect->indexList);
481 while (pos != last && strncasecmp(s, (*pos)->key, s_l) == 0) {
482 utarray_foreach (c, (*pos)->items, uint32_t) {
483 result = InsertResult(result, *c);
484 }
485 ++pos;
486 }
487
488 return result;
489 }
490
CharSelectDataAliases(CharSelectData * charselect,uint32_t unicode)491 UT_array* CharSelectDataAliases(CharSelectData* charselect, uint32_t unicode)
492 {
493 const char* data = charselect->dataFile;
494 const int detailIndex = CharSelectDataGetDetailIndex(charselect, unicode);
495 if(detailIndex == 0) {
496 return fcitx_utils_new_string_list();
497 }
498
499 const uint8_t count = * (uint8_t *)(data + detailIndex + 8);
500 uint32_t offset = FromLittleEndian32(data + detailIndex + 4);
501
502 UT_array* aliases = fcitx_utils_new_string_list();
503
504 int i;
505 for (i = 0; i < count; i++) {
506 const char* r = data + offset;
507 utarray_push_back(aliases, &r);
508 offset += strlen(data + offset) + 1;
509 }
510 return aliases;
511 }
512
513
CharSelectDataNotes(CharSelectData * charselect,uint32_t unicode)514 UT_array* CharSelectDataNotes(CharSelectData* charselect, uint32_t unicode)
515 {
516 const int detailIndex = CharSelectDataGetDetailIndex(charselect, unicode);
517 if(detailIndex == 0) {
518 return fcitx_utils_new_string_list();
519 }
520
521 const char* data = charselect->dataFile;
522 const uint8_t count = * (uint8_t *)(data + detailIndex + 13);
523 uint32_t offset = FromLittleEndian32(data + detailIndex + 9);
524
525 UT_array* notes = fcitx_utils_new_string_list();
526
527 int i;
528 for (i = 0; i < count; i++) {
529 const char* r = data + offset;
530 utarray_push_back(notes, &r);
531 offset += strlen(data + offset) + 1;
532 }
533
534 return notes;
535 }
536
537 UT_array*
CharSelectDataSeeAlso(CharSelectData * charselect,uint32_t unicode)538 CharSelectDataSeeAlso(CharSelectData* charselect, uint32_t unicode)
539 {
540 UT_array *seeAlso;
541 utarray_new(seeAlso, fcitx_int32_icd);
542 const int detailIndex = CharSelectDataGetDetailIndex(charselect, unicode);
543 if(detailIndex == 0) {
544 return seeAlso;
545 }
546
547 const char* data = charselect->dataFile;
548 const uint8_t count = * (uint8_t *)(data + detailIndex + 28);
549 uint32_t offset = FromLittleEndian32(data + detailIndex + 24);
550
551 int i;
552 for (i = 0; i < count; i++) {
553 uint32_t c = FromLittleEndian16 (data + offset);
554 utarray_push_back(seeAlso, &c);
555 offset += 2;
556 }
557
558 return seeAlso;
559 }
560
CharSelectDataEquivalents(CharSelectData * charselect,uint32_t unicode)561 UT_array* CharSelectDataEquivalents(CharSelectData* charselect, uint32_t unicode)
562 {
563 const int detailIndex = CharSelectDataGetDetailIndex(charselect, unicode);
564 if(detailIndex == 0) {
565 return fcitx_utils_new_string_list();
566 }
567
568 const char* data = charselect->dataFile;
569 const uint8_t count = * (uint8_t *)(data + detailIndex + 23);
570 uint32_t offset = FromLittleEndian32(data + detailIndex + 19);
571
572 UT_array* equivalents = fcitx_utils_new_string_list();
573
574 int i;
575 for (i = 0; i < count; i++) {
576 const char* r = data + offset;
577 utarray_push_back(equivalents, &r);
578 offset += strlen(data + offset) + 1;
579 }
580
581 return equivalents;
582 }
583
CharSelectDataApproximateEquivalents(CharSelectData * charselect,uint32_t unicode)584 UT_array* CharSelectDataApproximateEquivalents(CharSelectData* charselect, uint32_t unicode)
585 {
586 const int detailIndex = CharSelectDataGetDetailIndex(charselect, unicode);
587 if(detailIndex == 0) {
588 return fcitx_utils_new_string_list();
589 }
590
591 const char* data = charselect->dataFile;
592 const uint8_t count = * (uint8_t *)(data + detailIndex + 18);
593 uint32_t offset = FromLittleEndian32(data + detailIndex + 14);
594
595 UT_array* approxEquivalents = fcitx_utils_new_string_list();
596
597 int i;
598 for (i = 0; i < count; i++) {
599 const char* r = data + offset;
600 utarray_push_back(approxEquivalents, &r);
601 offset += strlen(data + offset) + 1;
602 }
603
604 return approxEquivalents;
605 }
606
607
FormatCode(uint32_t code,int length,const char * prefix)608 char* FormatCode(uint32_t code, int length, const char* prefix)
609 {
610 char* s = NULL;
611 char* fmt = NULL;
612 asprintf(&fmt, "%%s%%0%dX", length);
613 asprintf(&s, fmt, prefix, code);
614 free(fmt);
615 return s;
616 }
617
SplitString(const char * s)618 UT_array* SplitString(const char* s)
619 {
620 UT_array* result = fcitx_utils_new_string_list();
621 int start = 0;
622 int end = 0;
623 int length = strlen(s);
624 while (end < length) {
625 while (end < length && (isdigit(s[end]) || isalpha(s[end]) || s[end] == '+')) {
626 end++;
627 }
628 if (start != end) {
629 char* p = strndup(&s[start], end - start);
630 utarray_push_back(result, &p);
631 free(p);
632 }
633 start = end;
634 while (end < length && !(isdigit(s[end]) || isalpha(s[end]) || s[end] == '+')) {
635 end++;
636 start++;
637 }
638 }
639 return result;
640 }
641
CharSelectDataIndexNew(const char * key)642 CharSelectDataIndex* CharSelectDataIndexNew(const char* key)
643 {
644 CharSelectDataIndex* idx = fcitx_utils_new(CharSelectDataIndex);
645 idx->key = strdup(key);
646 utarray_new(idx->items, fcitx_int32_icd);
647 return idx;
648 }
649
CharSelectDataAppendToIndex(CharSelectData * charselect,uint32_t unicode,const char * str)650 void CharSelectDataAppendToIndex(CharSelectData* charselect, uint32_t unicode, const char* str)
651 {
652 UT_array* strings = SplitString(str);
653 utarray_foreach(s, strings, char*) {
654 CharSelectDataIndex* item = NULL;
655 HASH_FIND_STR(charselect->index, *s, item);
656 if (!item) {
657 item = CharSelectDataIndexNew(*s);
658 HASH_ADD_KEYPTR(hh, charselect->index, item->key, strlen(item->key), item);
659 }
660 utarray_push_back(item->items, &unicode);
661 }
662 utarray_free(strings);
663 }
664
CharSelectDataDump(CharSelectData * charselect)665 void CharSelectDataDump(CharSelectData* charselect)
666 {
667 //CharSelectDataIndex* item = charselect->index;
668 /*
669 while(item) {
670 fprintf(stderr, "%s\n", item->key);
671 item = item->hh.next;
672 } */
673
674 utarray_foreach(p, charselect->indexList, CharSelectDataIndex*) {
675 fprintf(stderr, "%s\n", (*p)->key);
676 }
677 }
678
CharSelectDataCreateIndex(CharSelectData * charselect)679 void CharSelectDataCreateIndex(CharSelectData* charselect)
680 {
681 // character names
682 const char* data = charselect->dataFile;
683 const uint32_t nameOffsetBegin = FromLittleEndian32(data+4);
684 const uint32_t nameOffsetEnd = FromLittleEndian32(data+8);
685
686 int max = ((nameOffsetEnd - nameOffsetBegin) / 8) - 1;
687
688 int pos, j;
689
690 for (pos = 0; pos <= max; pos++) {
691 const uint32_t unicode = FromLittleEndian32(data + nameOffsetBegin + pos*8);
692 uint32_t offset = FromLittleEndian32(data + nameOffsetBegin + pos*8 + 4);
693 // TODO
694 CharSelectDataAppendToIndex(charselect, unicode, (data + offset + 1));
695 }
696
697 // details
698 const uint32_t detailsOffsetBegin = FromLittleEndian32(data+12);
699 const uint32_t detailsOffsetEnd = FromLittleEndian32(data+16);
700
701 max = ((detailsOffsetEnd - detailsOffsetBegin) / 29) - 1;
702 for (pos = 0; pos <= max; pos++) {
703 const uint32_t unicode = FromLittleEndian32(data + detailsOffsetBegin + pos*29);
704
705 // aliases
706 const uint8_t aliasCount = * (uint8_t *)(data + detailsOffsetBegin + pos*29 + 8);
707 uint32_t aliasOffset = FromLittleEndian32(data + detailsOffsetBegin + pos*29 + 4);
708
709 for (j = 0; j < aliasCount; j++) {
710 CharSelectDataAppendToIndex(charselect, unicode, data + aliasOffset);
711 aliasOffset += strlen(data + aliasOffset) + 1;
712 }
713
714 // notes
715 const uint8_t notesCount = * (uint8_t *)(data + detailsOffsetBegin + pos*29 + 13);
716 uint32_t notesOffset = FromLittleEndian32(data + detailsOffsetBegin + pos*29 + 9);
717
718 for (j = 0; j < notesCount; j++) {
719 CharSelectDataAppendToIndex(charselect, unicode, data + notesOffset);
720 notesOffset += strlen(data + notesOffset) + 1;
721 }
722
723 // approximate equivalents
724 const uint8_t apprCount = * (uint8_t *)(data + detailsOffsetBegin + pos*29 + 18);
725 uint32_t apprOffset = FromLittleEndian32(data + detailsOffsetBegin + pos*29 + 14);
726
727 for (j = 0; j < apprCount; j++) {
728 CharSelectDataAppendToIndex(charselect, unicode,data + apprOffset);
729 apprOffset += strlen(data + apprOffset) + 1;
730 }
731
732 // equivalents
733 const uint8_t equivCount = * (uint8_t *)(data + detailsOffsetBegin + pos*29 + 23);
734 uint32_t equivOffset = FromLittleEndian32(data + detailsOffsetBegin + pos*29 + 19);
735
736 for (j = 0; j < equivCount; j++) {
737 CharSelectDataAppendToIndex(charselect, unicode, data + equivOffset);
738 equivOffset += strlen(data + equivOffset) + 1;
739 }
740
741 // see also - convert to string (hex)
742 const uint8_t seeAlsoCount = * (uint8_t *)(data + detailsOffsetBegin + pos*29 + 28);
743 uint32_t seeAlsoOffset = FromLittleEndian32(data + detailsOffsetBegin + pos*29 + 24);
744
745 for (j = 0; j < seeAlsoCount; j++) {
746 uint32_t seeAlso = FromLittleEndian16 (data + seeAlsoOffset);
747 char* code = FormatCode(seeAlso, 4, "");
748 CharSelectDataAppendToIndex(charselect, unicode, code);
749 free(code);
750 equivOffset += strlen(data + equivOffset) + 1;
751 }
752 }
753
754 // unihan data
755 // temporary disabled due to the huge amount of data
756 const uint32_t unihanOffsetBegin = FromLittleEndian32(data+36);
757 const uint32_t unihanOffsetEnd = charselect->size;
758 max = ((unihanOffsetEnd - unihanOffsetBegin) / 32) - 1;
759
760 for (pos = 0; pos <= max; pos++) {
761 const uint32_t unicode = FromLittleEndian32(data + unihanOffsetBegin + pos*32);
762 for(j = 0; j < 7; j++) {
763 uint32_t offset = FromLittleEndian32(data + unihanOffsetBegin + pos*32 + 4 + j*4);
764 if(offset != 0) {
765 CharSelectDataAppendToIndex(charselect, unicode, (data + offset));
766 }
767 }
768 }
769
770 utarray_new(charselect->indexList, fcitx_ptr_icd);
771
772 CharSelectDataIndex* idx = charselect->index;
773 while(idx) {
774 utarray_push_back(charselect->indexList, &idx);
775 idx = idx->hh.next;
776 }
777
778 utarray_sort(charselect->indexList, pindex_cmp);
779 }
780
CharSelectDataFree(CharSelectData * charselect)781 void CharSelectDataFree(CharSelectData* charselect)
782 {
783 utarray_free(charselect->indexList);
784 while(charselect->index) {
785 CharSelectDataIndex* p = charselect->index;
786 HASH_DEL(charselect->index, p);
787 free(p->key);
788 utarray_free(p->items);
789 free(p);
790 }
791 free(charselect->dataFile);
792 free(charselect);
793 }
794