1 /*
2 Copyright (c) 2003-2013 uim Project https://github.com/uim/uim
3
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 1. Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 3. Neither the name of authors nor the names of its contributors
16 may be used to endorse or promote products derived from this software
17 without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 SUCH DAMAGE.
30
31 */
32
33 /*
34 * SKK is a simple Japanese input method
35 *
36 * Many many things are to be implemented!
37 */
38 #include <config.h>
39
40 #include <sys/types.h>
41 #include <sys/mman.h>
42 #include <sys/stat.h>
43 #include <fcntl.h>
44 #include <unistd.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <stdio.h>
48 #include <ctype.h>
49 #include <signal.h>
50 #include <errno.h>
51 #include <sys/socket.h>
52 #include <netdb.h>
53 #include <sys/param.h>
54 #ifdef HAVE_STRINGS_H
55 #include <strings.h>
56 #endif
57 #ifdef HAVE_POLL_H
58 #include <poll.h>
59 #elif defined(HAVE_SYS_POLL_H)
60 #include <sys/poll.h>
61 #else
62 #include "bsd-poll.h"
63 #endif
64
65 #include "uim.h"
66 #include "uim-scm.h"
67 #include "uim-scm-abbrev.h"
68 #include "uim-helper.h"
69 #include "dynlib.h"
70 #include "uim-notify.h"
71 #include "gettext.h"
72
73 #include "bsdlook.h"
74
75 #define skk_isalpha(ch) (skk_islower(ch) || skk_isupper(ch))
76 #define skk_islower(ch) ((((unsigned char)ch) >= 'a') && (((unsigned char)ch) <= 'z'))
77 #define skk_isupper(ch) ((((unsigned char)ch) >= 'A') && (((unsigned char)ch) <= 'Z'))
78 #define skk_isascii(ch) ((((unsigned char)ch) & ~0x7f) == 0)
79
80 #define IGNORING_WORD_MAX 63
81 #define USE_SKK_JISYO_S_BUF 1 /* use SKK-JISYO.S as a cache for
82 word completion */
83 #define SKK_JISYO_S DATADIR "/skk/SKK-JISYO.S"
84
85 /*
86 * cand : candidate
87 */
88
89
90 /* candidate array for each okurigana
91 *
92 * |C0|C1| .. |Cnr_real_cands| .. |Cnr_cands|
93 * <-------should be saved --><-- cache of master dict -->
94 */
95 struct skk_cand_array {
96 /* okurigana string */
97 char *okuri;
98
99 int nr_cands; /* length of cands array allocated */
100 int nr_real_cands; /* length of read from file part */
101 /* candidate string */
102 char **cands;
103
104 /* this array was used and merged with okuri-nasi entry array */
105 int is_used;
106 /* link to its parent line */
107 struct skk_line *line;
108 };
109
110 /* skk_line state */
111 #define SKK_LINE_NEED_SAVE (1<<0)
112 #define SKK_LINE_USE_FOR_COMPLETION (1<<1)
113
114 /* skk dictionary line */
115 struct skk_line {
116 /* line index. head part */
117 char *head;
118 /* line index. okurigana part. value will be 0 if it is okuri-nasi
119 entry */
120 char okuri_head;
121 /* array of candidate array for different okuri-gana */
122 int nr_cand_array;
123 struct skk_cand_array *cands;
124 /* state of line */
125 int state;
126 /* link to next entry in the list */
127 struct skk_line *next;
128 };
129
130 /* skk dictionary file */
131 typedef struct dic_info_ {
132 /* address of mmap'ed dictionary file */
133 void *addr;
134 /* byte offset of first valid entry in mmap'ed region */
135 int first;
136 /* byte offset of first okuri-nasi entry */
137 int border;
138 /* size of dictionary file */
139 int size;
140 /* head of cached skk dictionary line list. LRU ordered */
141 struct skk_line head;
142 /* timestamp of personal dictionary */
143 time_t personal_dic_timestamp;
144 /* whether cached lines are modified or not */
145 int cache_modified;
146 /* length of cached lines */
147 int cache_len;
148 /* skkserv related state */
149 int skkserv_state;
150 /* skkserv hostname */
151 char *skkserv_hostname;
152 /* skkserv port number */
153 int skkserv_portnum;
154 /* skkserv address family. AF_UNSPEC or AF_INET or AF_INET6 */
155 int skkserv_family;
156 /* timeout (milisec) for skkserv completion */
157 int skkserv_completion_timeout;
158 } dic_info;
159
160 /* completion */
161 struct skk_comp_array {
162 /* index of completion */
163 char *head;
164 /* array of completion string */
165 int nr_comps;
166 char **comps;
167 /**/
168 int refcount;
169 /**/
170 struct skk_comp_array *next;
171 } *skk_comp;
172
173 /* XXX should create skk.h */
174 static uim_lisp skk_replace_numeric(uim_lisp head_);
175
176 static uim_lisp restore_numeric(const char *s, uim_lisp numlst_);
177 static char *replace_numeric(const char *str);
178 static char *sanitize_word(const char *str, const char *prefix);
179 static int is_purged_cand(const char *str);
180 static void merge_purged_cands(dic_info *skk_dic,
181 struct skk_cand_array *src_ca,
182 struct skk_cand_array *dst_ca, int src_nth, int dst_nth);
183 static void merge_purged_cand_to_dst_array(dic_info *skk_dic,
184 struct skk_cand_array *src_ca,
185 struct skk_cand_array *dst_ca, char *purged_cand);
186 static void update_personal_dictionary_cache_with_file(dic_info *skk_dic,
187 const char *fn, int is_personal);
188 static void look_get_comp(struct skk_comp_array *ca, const char *str);
189 static uim_lisp look_get_top_word(const char *str);
190 static char *quote_word(const char *word, const char *prefix);
191
192 /* skkserv connection */
193 #define SKK_SERV_BUFSIZ 1024
194 #define SKK_SERV_USE (1<<0)
195 #define SKK_SERV_CONNECTED (1<<1)
196 #define SKK_SERV_TRY_COMPLETION (1<<2)
197
198 static int skkservsock = -1;
199 static FILE *rserv, *wserv;
200 /* prototype */
201 static int open_skkserv(const char *hostname, int portnum, int family);
202 static void close_skkserv(void);
203 static void skkserv_disconnected(dic_info *di);
204
205 static int use_look = 0;
206 static uim_look_ctx *skk_look_ctx = NULL;
207
208 static uim_bool is_setugid;
209
210 static int
calc_line_len(const char * s)211 calc_line_len(const char *s)
212 {
213 int i;
214 for (i = 0; s[i] != '\n'; i++);
215 return i;
216 }
217
218 static int
is_okuri(const char * line_str)219 is_okuri(const char *line_str)
220 {
221 const char *b;
222 /* find first white space */
223 b = strchr(line_str, ' ');
224 if (!b || b == line_str)
225 return 0;
226 /* check previous character */
227 b--;
228 if (skk_isalpha(*b) && (!skk_isascii(line_str[0]) || line_str[0] == '>'))
229 return 1;
230 return 0;
231 }
232
233 static int
find_first_line(dic_info * di)234 find_first_line(dic_info *di)
235 {
236 char *s = di->addr;
237 int off = 0;
238
239 while (off < di->size && s[off] == ';') {
240 int l = calc_line_len(&s[off]);
241 off += l + 1;
242 }
243 return off;
244 }
245
246 static int
find_border(dic_info * di)247 find_border(dic_info *di)
248 {
249 char *s = di->addr;
250 int off = 0;
251 while (off < di->size) {
252 int l = calc_line_len(&s[off]);
253 if (s[off] == ';') {
254 off += l + 1;
255 continue;
256 }
257 if (!is_okuri(&s[off]))
258 return off;
259 off += l + 1;
260 }
261 /* every entry is okuri-ari, it may not happen. */
262 return di->size - 1;
263 }
264
265 static dic_info *
open_dic(const char * fn,uim_bool use_skkserv,const char * skkserv_hostname,int skkserv_portnum,int skkserv_family)266 open_dic(const char *fn, uim_bool use_skkserv, const char *skkserv_hostname,
267 int skkserv_portnum, int skkserv_family)
268 {
269 dic_info *di;
270 struct stat st;
271 int fd;
272 void *addr = NULL;
273 int mmap_done = 0;
274
275 di = (dic_info *)uim_malloc(sizeof(dic_info));
276
277 di->skkserv_hostname = NULL;
278 if (use_skkserv) {
279 di->skkserv_hostname = uim_strdup(skkserv_hostname);
280 di->skkserv_portnum = skkserv_portnum;
281 di->skkserv_family = skkserv_family;
282 di->skkserv_state = SKK_SERV_USE | open_skkserv(skkserv_hostname,
283 skkserv_portnum,
284 skkserv_family);
285 di->skkserv_completion_timeout = uim_scm_symbol_value_int("skk-skkserv-completion-timeout");
286 } else {
287 di->skkserv_state = 0;
288 fd = open(fn, O_RDONLY);
289 if (fd != -1) {
290 if (fstat(fd, &st) != -1) {
291 addr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
292 if (addr != MAP_FAILED) {
293 mmap_done = 1;
294 }
295 }
296 close(fd);
297 }
298 }
299
300 di->addr = mmap_done ? addr : NULL;
301 di->size = mmap_done ? st.st_size : 0;
302 di->first = mmap_done ? find_first_line(di) : 0;
303 di->border = mmap_done ? find_border(di) : 0;
304
305 di->head.next = NULL;
306 di->personal_dic_timestamp = 0;
307 di->cache_modified = 0;
308 di->cache_len = 0;
309
310 return di;
311 }
312
313 static const char *
find_line(dic_info * di,int off)314 find_line(dic_info *di, int off)
315 {
316 char *ptr = di->addr;
317 while (off > 0 && (ptr[off] != '\n' || ptr[off + 1] == ';'))
318 off--;
319
320 if (off)
321 off++;
322
323 return &ptr[off];
324 }
325
326 static char *
extract_line_index(dic_info * di,int off,char * buf,int len)327 extract_line_index(dic_info *di, int off, char *buf, int len)
328 {
329 const char *p = find_line(di, off);
330 int i;
331 if (p[0] == ';')
332 return NULL;
333
334 for (i = 0; i < len && p[i] != ' '; i++)
335 buf[i] = p[i];
336 buf[i] = '\0';
337
338 return buf;
339 }
340
341 static int
do_search_line(dic_info * di,const char * s,int min,int max,int d)342 do_search_line(dic_info *di, const char *s, int min,
343 int max, int d)
344 {
345 char buf[256];
346 char *r;
347 int idx = ((unsigned int)min + (unsigned int)max) >> 1;
348 int c = 0;
349
350 if (abs(max - min) < 4)
351 return -1;
352
353 r = extract_line_index(di, idx, buf, 256);
354 if (r)
355 c = strcmp(s, r);
356 else
357 return -1;
358
359 if (!c)
360 return idx;
361
362 if (c * d > 0)
363 return do_search_line(di, s, idx, max, d);
364 else
365 return do_search_line(di, s, min, idx, d);
366
367 return -1;
368 }
369
370 /* This function name is temporary. I want a better name. */
371 static char *
first_space(char * str)372 first_space(char *str)
373 {
374 while (*str && (*str != ' '))
375 str++;
376
377 return str;
378 }
379
380 /* This function returns a pointer with '/' or '\0' */
381 static char *
next_cand_slash(char * str)382 next_cand_slash(char *str)
383 {
384 int i = 0;
385 int open_bracket = 0;
386
387 while (*str && (*str != '/' || open_bracket == 1)) {
388 if (*str == '[' && i == 0)
389 open_bracket = 1;
390
391 if (open_bracket == 1 && *str == ']' && *(str + 1) == '/')
392 open_bracket = 0;
393 str++;
394 i++;
395 }
396 return str;
397 }
398
399 static char *
next_slash_in_bracket(char * str)400 next_slash_in_bracket(char *str)
401 {
402 while (*str && *str != '/')
403 str++;
404
405 return str;
406 }
407
408 static char *
okuri_in_bracket(char * str)409 okuri_in_bracket(char *str)
410 {
411 char *p, *term;
412
413 if (!str)
414 return NULL;
415
416 p = uim_strdup(str);
417 term = next_slash_in_bracket(p);
418
419 if (*term == '\0') {
420 /* this is not the bracket used for skk-henkan-strict-okuri-precedence */
421 free(p);
422 return NULL;
423 }
424
425 *term = '\0';
426 return p;
427 }
428
429 static char *
nth_candidate(char * str,int nth)430 nth_candidate(char *str, int nth)
431 {
432 char *p, *term;
433 int i;
434
435 str = first_space(str);
436 for (i = 0; i <= nth; i++) {
437 str = next_cand_slash(str);
438 if (*str == '/')
439 str++;
440 }
441
442 if (*str == '\0')
443 return NULL;
444
445 p = uim_strdup(str);
446 term = next_cand_slash(p);
447 *term = '\0';
448 return p;
449 }
450
451 static void
free_skk_line(struct skk_line * sl)452 free_skk_line(struct skk_line *sl)
453 {
454 int i, j;
455
456 if (!sl)
457 return ;
458
459 for (i = 0; i < sl->nr_cand_array; i++) {
460 struct skk_cand_array *ca = &sl->cands[i];
461 for (j = 0; j < ca->nr_cands; j++)
462 free(ca->cands[j]);
463 free(ca->okuri);
464 free(ca->cands);
465 }
466 free(sl->head);
467 free(sl->cands);
468 free(sl);
469 }
470
471 static void
free_skk_dic(dic_info * skk_dic)472 free_skk_dic(dic_info *skk_dic)
473 {
474 if (skk_dic) {
475 struct skk_line *sl, *tmp;
476
477 if (skk_dic->addr)
478 munmap(skk_dic->addr, skk_dic->size);
479
480 sl = skk_dic->head.next;
481 while (sl) {
482 tmp = sl;
483 sl = sl->next;
484 free_skk_line(tmp);
485 }
486
487 if (skk_dic->skkserv_state & SKK_SERV_CONNECTED)
488 close_skkserv();
489 free(skk_dic->skkserv_hostname);
490
491 free(skk_dic);
492 }
493 }
494
495 /* init */
496 static uim_lisp
skk_dic_open(uim_lisp fn_,uim_lisp use_skkserv_,uim_lisp skkserv_hostname_,uim_lisp skkserv_portnum_,uim_lisp skkserv_family_)497 skk_dic_open(uim_lisp fn_, uim_lisp use_skkserv_, uim_lisp skkserv_hostname_,
498 uim_lisp skkserv_portnum_, uim_lisp skkserv_family_)
499 {
500 const char *fn, *skkserv_hostname, *skkserv_family_str;
501 uim_bool use_skkserv;
502 int skkserv_portnum, skkserv_family;
503 dic_info *skk_dic;
504
505 fn = REFER_C_STR(fn_);
506 use_skkserv = C_BOOL(use_skkserv_);
507 skkserv_hostname = REFER_C_STR(skkserv_hostname_);
508 skkserv_portnum = C_INT(skkserv_portnum_);
509 skkserv_family_str = REFER_C_STR(skkserv_family_);
510
511 is_setugid = uim_helper_is_setugid();
512 signal(SIGPIPE, SIG_IGN);
513
514 skkserv_family = AF_UNSPEC;
515 if (skkserv_family_str) {
516 if (!strcmp(skkserv_family_str, "inet"))
517 skkserv_family = AF_INET;
518 else if (!strcmp(skkserv_family_str, "inet6"))
519 skkserv_family = AF_INET6;
520 }
521
522 skk_dic = open_dic(fn, use_skkserv, skkserv_hostname, skkserv_portnum,
523 skkserv_family);
524
525 return MAKE_PTR(skk_dic);
526 }
527
528 static uim_lisp
skk_free_dic(uim_lisp skk_dic_)529 skk_free_dic(uim_lisp skk_dic_)
530 {
531 dic_info *skk_dic = NULL;
532
533 if (PTRP(skk_dic_))
534 skk_dic = C_PTR(skk_dic_);
535
536 free_skk_dic(skk_dic);
537
538 return uim_scm_f();
539 }
540
541 static struct skk_cand_array *
find_candidate_array_from_line(struct skk_line * sl,const char * okuri,int create_if_notfound)542 find_candidate_array_from_line(struct skk_line *sl, const char *okuri,
543 int create_if_notfound)
544 {
545 int i;
546 struct skk_cand_array *ca;
547
548 if (!okuri || !strlen(okuri))
549 return &sl->cands[0];
550
551 for (i = 1; i < sl->nr_cand_array; i++) {
552 if (okuri && !strcmp(okuri, sl->cands[i].okuri))
553 return &sl->cands[i];
554 }
555
556 if (!create_if_notfound)
557 return &sl->cands[0];
558
559 /* allocate now */
560 sl->nr_cand_array++;
561 sl->cands = uim_realloc(sl->cands,
562 sizeof(struct skk_cand_array) * sl->nr_cand_array);
563 ca = &sl->cands[sl->nr_cand_array - 1];
564 ca->is_used = 0;
565 ca->cands = NULL;
566 ca->nr_cands = 0;
567 ca->nr_real_cands = 0;
568 ca->okuri = uim_strdup(okuri);
569 ca->line = sl;
570 return ca;
571 }
572
573 static void
push_back_candidate_to_array(struct skk_cand_array * ca,const char * cand)574 push_back_candidate_to_array(struct skk_cand_array *ca, const char *cand)
575 {
576 ca->nr_cands++;
577 if (ca->cands)
578 ca->cands = uim_realloc(ca->cands, sizeof(char *) * ca->nr_cands);
579 else
580 ca->cands = uim_malloc(sizeof(char *));
581 ca->cands[ca->nr_cands - 1] = uim_strdup(cand);
582 }
583
584 static void
merge_base_candidates_to_array(dic_info * skk_dic,struct skk_line * sl,struct skk_cand_array * dst_ca)585 merge_base_candidates_to_array(dic_info *skk_dic,
586 struct skk_line *sl,
587 struct skk_cand_array *dst_ca)
588 {
589 int i, j;
590 struct skk_cand_array *src_ca;
591
592 if (!sl)
593 return ;
594
595 src_ca = &sl->cands[0];
596 if (src_ca == dst_ca)
597 return ;
598
599 for (i = 0; i < src_ca->nr_cands; i++) {
600 int dup = 0;
601 int src_purged_cand_index = -1;
602 int dst_purged_cand_index = -1;
603
604 if (i < src_ca->nr_real_cands && is_purged_cand(src_ca->cands[i]))
605 src_purged_cand_index = i;
606
607 for (j = 0; j < dst_ca->nr_cands; j++) {
608 if (dst_purged_cand_index == -1 && is_purged_cand(dst_ca->cands[j]))
609 dst_purged_cand_index = j;
610 if (!strcmp(src_ca->cands[i], dst_ca->cands[j])) {
611 dup = 1;
612 }
613 }
614 if (!dup) {
615 if (src_purged_cand_index != -1 && dst_purged_cand_index != -1)
616 merge_purged_cands(skk_dic, src_ca, dst_ca, src_purged_cand_index,
617 dst_purged_cand_index);
618 else if (src_purged_cand_index != -1 && dst_purged_cand_index == -1)
619 merge_purged_cand_to_dst_array(skk_dic, src_ca, dst_ca,
620 src_ca->cands[src_purged_cand_index]);
621 #if 0
622 /*
623 * Just adding words subsequent to real_cands
624 * (push_back_candidate_to_array) is enough.
625 */
626 else if (src_purged_cand_index == -1 && dst_purged_cand_index != -1)
627 merge_word_to_dst_cand_array_with_purged_words(dst_ca,
628 src_ca, src_ca->cands[i]);
629 #endif
630 else
631 push_back_candidate_to_array(dst_ca, src_ca->cands[i]);
632 }
633 }
634 }
635
636 static void
compose_line_parts(dic_info * di,struct skk_line * sl,char * okuri,char * line)637 compose_line_parts(dic_info *di, struct skk_line *sl,
638 char *okuri, char *line)
639 {
640 int nth;
641 char *tmp;
642 struct skk_cand_array *ca = find_candidate_array_from_line(sl, okuri, 1);
643
644 nth = 0;
645 do {
646 tmp = nth_candidate(line, nth);
647 if (tmp) {
648 if (tmp[0] == '[') {
649 char *str = okuri_in_bracket(&tmp[1]);
650 if (!str) {
651 /*
652 * this is not the bracket used for
653 * skk-henkan-strict-okuri-precedence
654 */
655 char *quoted = quote_word(tmp, "(concat \"");
656 push_back_candidate_to_array(ca, quoted);
657 free(quoted);
658 } else {
659 tmp[0] = ' '; /* create first_space */
660 compose_line_parts(di, sl, str, &tmp[0]);
661 free(str);
662 }
663 } else if (tmp[0] != ']') {
664 push_back_candidate_to_array(ca, tmp);
665 }
666 nth++;
667 free(tmp);
668 } else {
669 break;
670 }
671 } while (1);
672 }
673
674 static struct skk_line *
alloc_skk_line(const char * word,char okuri_head)675 alloc_skk_line(const char *word, char okuri_head)
676 {
677 struct skk_line *sl;
678 sl = uim_malloc(sizeof(struct skk_line));
679 sl->state = 0;
680 sl->head = uim_strdup(word);
681 sl->okuri_head = okuri_head;
682 sl->nr_cand_array = 1;
683 sl->cands = uim_malloc(sizeof(struct skk_cand_array));
684 sl->cands[0].okuri = NULL;
685 sl->cands[0].cands = NULL;
686 sl->cands[0].nr_cands = 0;
687 sl->cands[0].nr_real_cands = 0;
688 sl->cands[0].is_used = 0;
689 sl->cands[0].line = sl;
690 return sl;
691 }
692
693 static struct skk_line *
copy_skk_line(struct skk_line * p)694 copy_skk_line(struct skk_line *p)
695 {
696 int i, j;
697 struct skk_line *sl;
698
699 if (!p)
700 return NULL;
701
702 sl = uim_malloc(sizeof(struct skk_line));
703 sl->state = p->state;
704 sl->head = uim_strdup(p->head);
705 sl->okuri_head = p->okuri_head;
706 sl->nr_cand_array = p->nr_cand_array;
707 sl->cands = uim_malloc(sizeof(struct skk_cand_array) * sl->nr_cand_array);
708 for (i = 0; i < sl->nr_cand_array; i++) {
709 struct skk_cand_array *ca = &sl->cands[i];
710 struct skk_cand_array *q = &p->cands[i];
711
712 ca->okuri = q->okuri ? uim_strdup(q->okuri) : NULL;
713 ca->nr_cands = q->nr_cands;
714 ca->nr_real_cands = q->nr_real_cands;
715 ca->cands = uim_malloc(sizeof(char *) * ca->nr_cands);
716 for (j = 0; j < ca->nr_cands; j++)
717 ca->cands[j] = uim_strdup(q->cands[j]);
718 ca->is_used = q->is_used;
719 ca->line = sl;
720 }
721 sl->next = NULL;
722 return sl;
723 }
724
725 /*
726 * Compose skk line
727 */
728 static struct skk_line *
compose_line(dic_info * di,const char * word,char okuri_head,char * entry)729 compose_line(dic_info *di, const char *word, char okuri_head, char *entry)
730 {
731 struct skk_line *sl;
732
733 sl = alloc_skk_line(word, okuri_head);
734 /* parse */
735 compose_line_parts(di, sl, NULL, entry);
736
737 return sl;
738 }
739
740 static void
add_line_to_cache_head(dic_info * di,struct skk_line * sl)741 add_line_to_cache_head(dic_info *di, struct skk_line *sl)
742 {
743 sl->next = di->head.next;
744 di->head.next = sl;
745
746 di->cache_len++;
747 di->cache_modified = 1;
748 }
749
750 static void
move_line_to_cache_head(dic_info * di,struct skk_line * sl)751 move_line_to_cache_head(dic_info *di, struct skk_line *sl)
752 {
753 struct skk_line *prev;
754
755 if (di->head.next == sl)
756 return;
757
758 prev = di->head.next;
759 while (prev->next != sl) {
760 prev = prev->next;
761 }
762 prev->next = sl->next;
763 sl->next = di->head.next;
764 di->head.next = sl;
765
766 di->cache_modified = 1;
767 }
768
769 #if 0
770 static void
771 add_line_to_cache_last(dic_info *di, struct skk_line *sl)
772 {
773 struct skk_line *prev;
774
775 if (di->head.next == NULL)
776 di->head.next = sl;
777 else {
778 prev = di->head.next;
779 while (prev->next) {
780 prev = prev->next;
781 }
782 prev->next = sl;
783 }
784 sl->next = NULL;
785
786 di->cache_len++;
787 di->cache_modified = 1;
788 }
789 #endif
790
791 static struct skk_line *
search_line_from_server(dic_info * di,const char * s,char okuri_head)792 search_line_from_server(dic_info *di, const char *s, char okuri_head)
793 {
794 char r;
795 struct skk_line *sl;
796 int n = 0, ret, len;
797 char buf[SKK_SERV_BUFSIZ];
798 char *line, *idx;
799 ssize_t nr;
800
801 if (!(di->skkserv_state & SKK_SERV_CONNECTED)) {
802 if (!((di->skkserv_state |= open_skkserv(di->skkserv_hostname,
803 di->skkserv_portnum,
804 di->skkserv_family)) &
805 SKK_SERV_CONNECTED))
806 return NULL;
807 }
808
809 uim_asprintf(&idx, "%s%c", s, okuri_head);
810
811 fprintf(wserv, "1%s \n", idx);
812 ret = fflush(wserv);
813 if (ret != 0 && errno == EPIPE) {
814 free(idx);
815 skkserv_disconnected(di);
816 return NULL;
817 }
818
819 uim_asprintf(&line, "%s ", idx);
820 free(idx);
821
822 if ((nr = read(skkservsock, &r, 1)) == -1 || nr == 0) {
823 skkserv_disconnected(di);
824 free(line);
825 return NULL;
826 }
827
828 if (r == '1') { /* succeeded */
829 while (1) {
830 if ((nr = read(skkservsock, &r, 1)) == -1 || nr == 0) {
831 skkserv_disconnected(di);
832 free(line);
833 return NULL;
834 }
835
836 if (r == '\n') {
837 len = strlen(line) + n;
838 line = uim_realloc(line, len + 1);
839 strlcat(line, buf, len + 1);
840 break;
841 }
842
843 buf[n] = r;
844 buf[n + 1] = '\0';
845 if (n == SKK_SERV_BUFSIZ - 2) {
846 len = strlen(line) + n + 1;
847 line = uim_realloc(line, len + 1);
848 strlcat(line, buf, len + 1);
849 n = 0;
850 } else {
851 n++;
852 }
853 }
854 sl = compose_line(di, s, okuri_head, line);
855 free(line);
856 return sl;
857 } else {
858 while ((nr = read(skkservsock, &r, 1)) != -1 && nr != 0 && r != '\n')
859 ;
860 free(line);
861 return NULL;
862 }
863 }
864
865 static struct skk_line *
search_line_from_file(dic_info * di,const char * s,char okuri_head)866 search_line_from_file(dic_info *di, const char *s, char okuri_head)
867 {
868 int n;
869 const char *p;
870 int len;
871 char *line, *idx;
872 struct skk_line *sl;
873
874 if (!di->addr)
875 return NULL;
876
877 uim_asprintf(&idx, "%s%c", s, okuri_head);
878
879 if (okuri_head)
880 n = do_search_line(di, idx, di->first, di->border - 1, -1);
881 else
882 n = do_search_line(di, idx, di->border, di->size - 1, 1);
883
884 free(idx);
885
886 if (n == -1)
887 return NULL;
888
889 p = find_line(di, n);
890 len = calc_line_len(p);
891 line = uim_malloc(len + 1);
892 /* strncat is used intentionally because *p is too long string */
893 line[0] = '\0';
894 strncat(line, p, len);
895 sl = compose_line(di, s, okuri_head, line);
896 free(line);
897 return sl;
898 }
899
900 static struct skk_line *
search_line_from_cache(dic_info * di,const char * s,char okuri_head)901 search_line_from_cache(dic_info *di, const char *s, char okuri_head)
902 {
903 struct skk_line *sl;
904
905 if (!di)
906 return NULL;
907
908 /* search from cache */
909 for (sl = di->head.next; sl; sl = sl->next) {
910 if (!strcmp(sl->head, s) && sl->okuri_head == okuri_head)
911 return sl;
912 }
913 return NULL;
914 }
915
916
917 static struct skk_cand_array *
find_cand_array(dic_info * di,const char * s,char okuri_head,const char * okuri,int create_if_not_found)918 find_cand_array(dic_info *di, const char *s,
919 char okuri_head, const char *okuri,
920 int create_if_not_found)
921 {
922 struct skk_line *sl, *sl_file;
923 struct skk_cand_array *ca;
924 int from_file = 0;
925
926 if (!di)
927 return NULL;
928
929 sl = search_line_from_cache(di, s, okuri_head);
930 if (!sl) {
931 if (di->skkserv_state & SKK_SERV_USE)
932 sl = search_line_from_server(di, s, okuri_head);
933 else
934 sl = search_line_from_file(di, s, okuri_head);
935 if (!sl) {
936 if (!create_if_not_found)
937 return NULL;
938 sl = alloc_skk_line(s, okuri_head);
939 }
940 from_file = 1;
941 add_line_to_cache_head(di, sl);
942 }
943
944 ca = find_candidate_array_from_line(sl, okuri, create_if_not_found);
945
946 if (!ca->is_used) {
947 merge_base_candidates_to_array(di, sl, ca);
948 ca->is_used = 1;
949 if (!from_file) {
950 if (di->skkserv_state & SKK_SERV_USE) {
951 sl_file = search_line_from_server(di, s, okuri_head);
952 if (!(di->skkserv_state & SKK_SERV_CONNECTED))
953 ca->is_used = 0;
954 } else
955 sl_file = search_line_from_file(di, s, okuri_head);
956 merge_base_candidates_to_array(di, sl_file, ca);
957 free_skk_line(sl_file);
958 }
959 }
960
961 return ca;
962 }
963
964 static struct skk_cand_array *
find_cand_array_lisp(dic_info * skk_dic,uim_lisp head_,uim_lisp okuri_head_,uim_lisp okuri_,int create_if_not_found,uim_lisp numeric_conv_)965 find_cand_array_lisp(dic_info *skk_dic, uim_lisp head_, uim_lisp okuri_head_,
966 uim_lisp okuri_, int create_if_not_found,
967 uim_lisp numeric_conv_)
968 {
969 char o;
970 const char *hs;
971 const char *okuri = NULL;
972 struct skk_cand_array *ca;
973 char *rs = NULL;
974
975 hs = REFER_C_STR(head_);
976
977 if (TRUEP(numeric_conv_))
978 rs = replace_numeric(hs);
979
980 if (okuri_ != uim_scm_null())
981 okuri = REFER_C_STR(okuri_);
982
983 if (okuri_head_ == uim_scm_null()) {
984 o = '\0';
985 } else {
986 const char *os = REFER_C_STR(okuri_head_);
987 o = os[0];
988 }
989
990 if (!rs)
991 ca = find_cand_array(skk_dic, hs, o, okuri, create_if_not_found);
992 else {
993 ca = find_cand_array(skk_dic, rs, o, okuri, create_if_not_found);
994 free(rs);
995 }
996
997 return ca;
998 }
999
1000 /*
1001 * purged_cand: /(skk-ignore-dic-word "foo" "bar" ...)/
1002 * purged_words: {"foo", "bar", ..., NULL}
1003 */
1004 static int
is_purged_cand(const char * str)1005 is_purged_cand(const char *str)
1006 {
1007 char *p;
1008
1009 p = strstr(str, "(skk-ignore-dic-word ");
1010 if (p == str)
1011 return 1;
1012
1013 return 0;
1014 }
1015
1016 static char *
expand_str(const char * p)1017 expand_str(const char *p)
1018 {
1019 char buf[BUFSIZ];
1020 int i = 0;
1021 int c, n, ndigits;
1022
1023 while (*p != '\0') {
1024 c = *p;
1025 if (c == '\\') {
1026 p++;
1027 c = *p;
1028 if (c == '\0')
1029 break;
1030 switch (c) {
1031 case '\\':
1032 c = '\\';
1033 break;
1034 case 'n':
1035 c = '\n';
1036 break;
1037 case 'r':
1038 c = '\r';
1039 break;
1040 case '0':
1041 case '1':
1042 case '2':
1043 case '3':
1044 case '4':
1045 case '5':
1046 case '6':
1047 case '7':
1048 n = c - '0';
1049 ndigits = 1;
1050 while (ndigits < 3) {
1051 p++;
1052 c = *p;
1053 if (*p == '\0') {
1054 uim_notify_fatal(N_("uim-skk: error in expand_str"));
1055 return NULL;
1056 }
1057 if (c >= '0' && c <= '7') {
1058 n = n * 8 + c - '0';
1059 ndigits++;
1060 } else {
1061 p--;
1062 break;
1063 }
1064 }
1065 c = n;
1066 }
1067 }
1068 if ((i + 1) >= BUFSIZ) {
1069 uim_notify_fatal(N_("uim-skk: too long word"));
1070 return NULL;
1071 }
1072 buf[i] = c;
1073 i++;
1074 p++;
1075 }
1076 buf[i] = '\0';
1077 return uim_strdup(buf);
1078 }
1079
1080 static char **
get_purged_words(const char * str)1081 get_purged_words(const char *str)
1082 {
1083 char *p;
1084 char **words = NULL;
1085 char *word = NULL;
1086 int nr = 0;
1087 int open = 0;
1088 int len = 0;
1089
1090 p = strstr(str, "(skk-ignore-dic-word");
1091 if (!p)
1092 return NULL;
1093
1094 p = first_space(p);
1095 if (*p == '\0')
1096 return NULL;
1097 p++;
1098
1099 while (*p != '\0') {
1100 if (*p == '"' && p[-1] != '\\') {
1101 open = open ? 0 : 1;
1102 if (open) {
1103 p++;
1104 word = p;
1105 len = 0;
1106 } else {
1107 char *orig = uim_malloc(len + 1);
1108 char *expanded_word;
1109
1110 nr++;
1111 if (words)
1112 words = uim_realloc(words, sizeof(char *) * nr);
1113 else
1114 words = uim_malloc(sizeof(char *));
1115 strlcpy(orig, word, len + 1);
1116
1117 expanded_word = expand_str(orig);
1118 if (expanded_word)
1119 words[nr - 1] = expanded_word;
1120 else
1121 words[nr - 1] = uim_strdup(orig);
1122 free(orig);
1123 }
1124 }
1125 p++;
1126 len++;
1127 }
1128 if (words) {
1129 words = uim_realloc(words, sizeof(char *) * (nr + 1));
1130 words[nr] = NULL;
1131 }
1132 return words;
1133 }
1134
1135 static int
nr_purged_words(char ** p)1136 nr_purged_words(char **p)
1137 {
1138 int i = 0;
1139
1140 while (p && p[i])
1141 i++;
1142 return i;
1143 }
1144
1145 static void
free_allocated_purged_words(char ** p)1146 free_allocated_purged_words(char **p)
1147 {
1148 int i = 0;
1149
1150 if (!p)
1151 return;
1152
1153 while (p[i]) {
1154 free(p[i]);
1155 i++;
1156 }
1157 free(p);
1158 }
1159
1160 static int
is_purged_only(struct skk_cand_array * ca)1161 is_purged_only(struct skk_cand_array *ca)
1162 {
1163 int i, j;
1164 char **purged_words;
1165
1166 if (ca->nr_real_cands > 1)
1167 return 0;
1168
1169 if ((purged_words = get_purged_words(ca->cands[0])) != NULL) {
1170 int nr_purged = nr_purged_words(purged_words);
1171 /* going to compare words beyond nr_real_cands */
1172 for (i = ca->nr_real_cands; i < ca->nr_cands; i++) {
1173 for (j = 0; j < nr_purged; j++) {
1174 /* return false if there is any different candidate */
1175 if (strcmp(ca->cands[i], purged_words[j])) {
1176 free_allocated_purged_words(purged_words);
1177 return 0;
1178 }
1179 }
1180 }
1181 free_allocated_purged_words(purged_words);
1182 return 1;
1183 }
1184 return 0;
1185 }
1186
1187 static int
match_to_discarding_index(int indices[],int n)1188 match_to_discarding_index(int indices[], int n)
1189 {
1190 int i = 0;
1191 while (indices[i] != -1) {
1192 if (indices[i] == n)
1193 return 1;
1194 i++;
1195 }
1196 return 0;
1197 }
1198
1199 static uim_lisp
skk_get_entry(uim_lisp skk_dic_,uim_lisp head_,uim_lisp okuri_head_,uim_lisp okuri_,uim_lisp numeric_conv_)1200 skk_get_entry(uim_lisp skk_dic_, uim_lisp head_, uim_lisp okuri_head_,
1201 uim_lisp okuri_, uim_lisp numeric_conv_)
1202 {
1203 struct skk_cand_array *ca;
1204 dic_info *skk_dic = NULL;
1205
1206 if (PTRP(skk_dic_))
1207 skk_dic = C_PTR(skk_dic_);
1208
1209 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, okuri_, 0, numeric_conv_);
1210
1211 if (ca && ca->nr_cands > 0 && !is_purged_only(ca))
1212 return uim_scm_t();
1213
1214 if (TRUEP(numeric_conv_))
1215 return skk_get_entry(skk_dic_, head_, okuri_head_, okuri_, uim_scm_f());
1216
1217 return uim_scm_f();
1218 }
1219
1220 static uim_lisp
skk_store_replaced_numeric_str(uim_lisp head_)1221 skk_store_replaced_numeric_str(uim_lisp head_)
1222 {
1223 const char *str;
1224 int len;
1225
1226 int prev_is_num = 0;
1227 int i, numlen = 0, start = 0;
1228 char *numstr = NULL;
1229 uim_lisp lst = uim_scm_null();
1230
1231 str = REFER_C_STR(head_);
1232 len = strlen(str);
1233
1234 for (i = 0; i < len; i++) {
1235 if (isdigit((unsigned char)str[i])) {
1236 if (prev_is_num == 0) {
1237 start = i;
1238 numlen = 1;
1239 } else {
1240 numlen++;
1241 }
1242 prev_is_num = 1;
1243 } else {
1244 if (prev_is_num) {
1245 /* add number into list */
1246 if (!numstr)
1247 numstr = uim_malloc(numlen + 1);
1248 else
1249 numstr = uim_realloc(numstr, numlen + 1);
1250 strlcpy(numstr, &str[start], numlen + 1);
1251 lst = CONS(MAKE_STR(numstr), lst);
1252 }
1253 prev_is_num = 0;
1254 }
1255 }
1256
1257 /*
1258 * Add last number into list if string is ended with numeric
1259 * character.
1260 */
1261 if (prev_is_num) {
1262 if (!numstr)
1263 numstr = uim_malloc(numlen + 1);
1264 else
1265 numstr = uim_realloc(numstr, numlen + 1);
1266 strlcpy(numstr, &str[start], numlen + 1);
1267 lst = CONS(MAKE_STR(numstr), lst);
1268 }
1269 free(numstr);
1270
1271 return uim_scm_callf("reverse", "o", lst);
1272 }
1273
1274 static char *wide_num_list[] =
1275 {"��", "��", "��", "��", "��", "��", "��", "��", "��", "��"};
1276 static char *kanji_num_list[] =
1277 {"��", "��", "��", "��", "��", "��", "ϻ", "��", "Ȭ", "��"};
1278 static char *kanji_num_position_list[] =
1279 {NULL, "��", "ɴ", "��", "��", NULL, NULL, NULL, "��", NULL,
1280 NULL, NULL, "��", NULL, NULL, NULL, "��", NULL, NULL, NULL};
1281 static char *kanji_check_num_list[] =
1282 {"��", "��", "б", "��", "��", "��", "ϻ", "��", "Ȭ", "��"};
1283 static char *kanji_check_num_position_list[] =
1284 {NULL, "��", "ɴ", "��", "��", NULL, NULL, NULL, "��", NULL,
1285 NULL, NULL, "��", NULL, NULL, NULL, "��", NULL, NULL, NULL};
1286
1287 static char *
numeric_wide_or_kanji_conv(const char * numstr,int method)1288 numeric_wide_or_kanji_conv(const char *numstr, int method)
1289 {
1290 char *mbstr;
1291 int i, len;
1292
1293 len = strlen(numstr);
1294 mbstr = uim_malloc(len * 2 + 1);
1295
1296 for (i = 0; i < len; i++) {
1297 if (method == 1)
1298 strcpy(&mbstr[i * 2], wide_num_list[numstr[i] - '0']);
1299 else
1300 strcpy(&mbstr[i * 2], kanji_num_list[numstr[i] - '0']);
1301 }
1302 mbstr[len * 2] = '\0';
1303
1304 return mbstr;
1305 }
1306
1307 static char *
numeric_kanji_with_position_conv(const char * numstr)1308 numeric_kanji_with_position_conv(const char *numstr)
1309 {
1310 char *mbstr;
1311 int i, j, len, mblen;
1312 int position;
1313 int head_is_zero = 0;
1314
1315 len = strlen(numstr);
1316 if (len > 20) /* too big number */
1317 return uim_strdup(numstr);
1318
1319 mbstr = uim_malloc(len * 2 + 1);
1320 mblen = len * 2;
1321
1322 for (i = 0, j = 0; j < len; i++, j++) {
1323 position = len - j - 1;
1324 if (numstr[j] == '0') {
1325 i--;
1326 mblen -= 2;
1327 /* check zero at the head */
1328 if (j == 0) {
1329 head_is_zero = 1;
1330 } else {
1331 /* add ��, ��, ��, �� for zero */
1332 if ((position >= 4) && ((position % 4) == 0) && !head_is_zero) {
1333 int use_position = 0;
1334 if (j >= 3) {
1335 if (!((numstr[j - 1] == '0') && (numstr[j - 2] == '0') &&
1336 (numstr[j - 3] == '0')))
1337 use_position = 1;
1338 } else if (j == 2) {
1339 if (!((numstr[j - 1] == '0') && (numstr[j - 2] == '0')))
1340 use_position = 1;
1341 } else if (j == 1) {
1342 if (!(numstr[j - 1] == '0'))
1343 use_position = 1;
1344 }
1345 if (use_position) {
1346 i++;
1347 mblen += 2;
1348 if (mblen > len * 2)
1349 mbstr = uim_realloc(mbstr, mblen + 2);
1350 strcpy(&mbstr[i * 2], kanji_num_position_list[position]);
1351 }
1352 }
1353 }
1354 } else {
1355 if (head_is_zero == 1)
1356 head_is_zero = 0;
1357
1358 /* replace numstr[j] with kanji number */
1359 if (numstr[j] == '1') {
1360 /*
1361 * use "��" only for the one at the place of ��, ��, ��, ��,
1362 * �� or ������
1363 */
1364 if (((position % 4) == 0) ||
1365 ((position >= 7) &&
1366 ((position % 4) == 3) &&
1367 (numstr[j + 1] == '0') &&
1368 (numstr[j + 2] == '0') &&
1369 (numstr[j + 3] == '0'))) {
1370 strcpy(&mbstr[i * 2], kanji_num_list[1]);
1371 } else {
1372 i--;
1373 mblen -= 2;
1374 }
1375 } else {
1376 strcpy(&mbstr[i * 2], kanji_num_list[numstr[j] - '0']);
1377 }
1378
1379 /* add ��, ɴ, �� for number whose place is exceeded �� */
1380 if (position > 4) {
1381 if ((position % 4) != 0) {
1382 i++;
1383 mblen += 2;
1384 if (mblen > len * 2)
1385 mbstr = uim_realloc(mbstr, mblen + 2);
1386 strcpy(&mbstr[i * 2], kanji_num_position_list[position % 4]);
1387 }
1388 }
1389
1390 /* add position */
1391 if (kanji_num_position_list[position]) {
1392 i++;
1393 mblen += 2;
1394 if (mblen > len * 2)
1395 mbstr = uim_realloc(mbstr, mblen + 2);
1396 strcpy(&mbstr[i * 2], kanji_num_position_list[position]);
1397 }
1398 }
1399 }
1400
1401 /* in case of zero */
1402 if (head_is_zero) {
1403 strcpy(&mbstr[0], kanji_num_list[0]);
1404 mblen = 2;
1405 }
1406
1407 mbstr[mblen] = '\0';
1408 return mbstr;
1409 }
1410
1411 static char *
numeric_kanji_for_check_conv(const char * numstr)1412 numeric_kanji_for_check_conv(const char *numstr)
1413 {
1414 char *mbstr;
1415 int i, j, len, mblen;
1416 int position;
1417 int head_is_zero = 0;
1418
1419 len = strlen(numstr);
1420 if (len > 20) /* too big number */
1421 return uim_strdup(numstr);
1422
1423 mbstr = uim_malloc(len * 2 + 1);
1424 mblen = len * 2;
1425
1426 for (i = 0, j = 0; j < len; i++, j++) {
1427 position = len - j - 1;
1428 if (numstr[j] == '0') {
1429 i--;
1430 mblen -= 2;
1431 /* check zero at the head */
1432 if (j == 0) {
1433 head_is_zero = 1;
1434 } else {
1435 /* add ��, ��, ��, �� for zero */
1436 if ((position >= 4) && ((position % 4) == 0) && !head_is_zero) {
1437 int use_position = 0;
1438 if (j >= 3) {
1439 if (!((numstr[j - 1] == '0') && (numstr[j - 2] == '0') &&
1440 (numstr[j - 3] == '0')))
1441 use_position = 1;
1442 } else if (j == 2) {
1443 if (!((numstr[j - 1] == '0') && (numstr[j - 2] == '0')))
1444 use_position = 1;
1445 } else if (j == 1) {
1446 if (!((numstr[j - 1] == '0')))
1447 use_position = 1;
1448 }
1449 if (use_position) {
1450 i++;
1451 mblen += 2;
1452 if (mblen > len * 2)
1453 mbstr = uim_realloc(mbstr, mblen + 2);
1454 strcpy(&mbstr[i * 2], kanji_check_num_position_list[position]);
1455 }
1456 }
1457 }
1458 } else {
1459 if (head_is_zero == 1)
1460 head_is_zero = 0;
1461
1462 /* replace numstr[j] with kanji number */
1463 strcpy(&mbstr[i * 2], kanji_check_num_list[numstr[j] - '0']);
1464
1465 /* add ��, ɴ, �� for number whose place is exceeded �� */
1466 if (position > 4) {
1467 if ((position % 4) != 0) {
1468 i++;
1469 mblen += 2;
1470 if (mblen > len * 2)
1471 mbstr = uim_realloc(mbstr, mblen + 2);
1472 strcpy(&mbstr[i * 2], kanji_check_num_position_list[position % 4]);
1473 }
1474 }
1475
1476 /* add position */
1477 if (kanji_check_num_position_list[position]) {
1478 i++;
1479 mblen += 2;
1480 if (mblen > len * 2)
1481 mbstr = uim_realloc(mbstr, mblen + 2);
1482 strcpy(&mbstr[i * 2], kanji_check_num_position_list[position]);
1483 }
1484 }
1485 }
1486
1487 /* in case of zero */
1488 if (head_is_zero) {
1489 strcpy(&mbstr[0], kanji_check_num_list[0]);
1490 mblen = 2;
1491 }
1492
1493 mbstr[mblen] = '\0';
1494 return mbstr;
1495 }
1496
1497 static char *
numeric_shogi_conv(const char * numstr)1498 numeric_shogi_conv(const char *numstr)
1499 {
1500 char *mbstr;
1501 int len;
1502
1503 len = strlen(numstr);
1504 if (len != 2) /* allow two digit number only */
1505 return uim_strdup(numstr);
1506
1507 mbstr = uim_malloc(5);
1508 strcpy(&mbstr[0], wide_num_list[numstr[0] - '0']);
1509 strcpy(&mbstr[2], kanji_num_list[numstr[1] - '0']);
1510 mbstr[4] = '\0';
1511
1512 return mbstr;
1513 }
1514
1515 /* returns string with malloc() */
1516 static char *
numeric_convert(const char * numstr,int method)1517 numeric_convert(const char *numstr, int method)
1518 {
1519 char *ret;
1520
1521 /*
1522 * method #4 is already handled in skk_get_nth_candidate()
1523 */
1524 switch (method) {
1525 case 0:
1526 ret = uim_strdup(numstr);
1527 break;
1528 case 1: /* ���ѿ��� */
1529 case 2: /* ������ �̼��̵�� */
1530 ret = numeric_wide_or_kanji_conv(numstr, method);
1531 break;
1532 case 3: /* ������ �̼��ͭ�� */
1533 ret = numeric_kanji_with_position_conv(numstr);
1534 break;
1535 case 5: /* ���ڼ�ɽ�� */
1536 ret = numeric_kanji_for_check_conv(numstr);
1537 break;
1538 case 9: /* ����ɽ�� */
1539 ret = numeric_shogi_conv(numstr);
1540 break;
1541 default:
1542 ret = uim_strdup(numstr);
1543 break;
1544 }
1545 return ret;
1546 }
1547
1548 static uim_lisp
skk_merge_replaced_numeric_str(uim_lisp str_,uim_lisp numlst_)1549 skk_merge_replaced_numeric_str(uim_lisp str_, uim_lisp numlst_)
1550 {
1551 char *str;
1552 int i, j, len, newlen;
1553 int method;
1554 int convlen;
1555 const char *numstr;
1556 char *convstr;
1557
1558 if (str_ == uim_scm_null())
1559 return uim_scm_null();
1560
1561 str = C_STR(str_);
1562 len = strlen(str);
1563 newlen = len;
1564
1565 for (i = 0, j = 0; j < len; i++, j++) {
1566 if (str[i] == '#') {
1567 method = str[i + 1] - '0';
1568 if (NULLP(numlst_))
1569 break;
1570
1571 numstr = REFER_C_STR(CAR(numlst_));
1572
1573 convstr = numeric_convert(numstr, method);
1574 convlen = strlen(convstr);
1575
1576 newlen = newlen - 2 + convlen;
1577 str = uim_realloc(str, newlen + 1);
1578 memmove(&str[i + convlen], &str[i + 2], newlen - i - convlen + 1);
1579 memcpy(&str[i], convstr, convlen);
1580 i = i - 2 + convlen;
1581
1582 numlst_ = CDR(numlst_);
1583 }
1584 }
1585
1586 return MAKE_STR_DIRECTLY(str);
1587 }
1588
1589 static char *
replace_numeric(const char * str)1590 replace_numeric(const char *str)
1591 {
1592 char *newstr;
1593 int prev_is_num = 0;
1594 int i, j, len, newlen;
1595
1596 newstr = uim_strdup(str);
1597 len = newlen = strlen(newstr);
1598
1599 for (i = 0, j = 0; j < len; i++, j++) {
1600 if (isdigit((unsigned char)newstr[i])) {
1601 if (prev_is_num == 0) {
1602 newstr[i] = '#';
1603 } else {
1604 memmove(&newstr[i], &newstr[i + 1], newlen - i);
1605 newlen--;
1606 i--;
1607 }
1608 prev_is_num = 1;
1609 } else {
1610 prev_is_num = 0;
1611 }
1612 }
1613 return newstr;
1614 }
1615
1616 static uim_lisp
skk_replace_numeric(uim_lisp head_)1617 skk_replace_numeric(uim_lisp head_)
1618 {
1619 char *str;
1620
1621 str = replace_numeric(REFER_C_STR(head_));
1622
1623 return MAKE_STR_DIRECTLY(str);
1624 }
1625
1626 static char *
find_numeric_conv_method4_mark(const char * cand,int * nth)1627 find_numeric_conv_method4_mark(const char *cand, int *nth)
1628 {
1629 int i, len;
1630 char *p;
1631
1632 len = strlen(cand);
1633
1634 p = strstr(cand, "#4");
1635 if (p) {
1636 for (i = 0; i < len; i++) {
1637 if (cand[i] == '#' && isdigit((unsigned char)cand[i + 1])) {
1638 (*nth)++;
1639 if (cand[i + 1] == '4')
1640 break;
1641 }
1642 }
1643 }
1644 return p;
1645 }
1646
1647 static int
has_numeric_in_head(uim_lisp head_)1648 has_numeric_in_head(uim_lisp head_)
1649 {
1650 const char *str;
1651 int i = 0;
1652
1653 str = REFER_C_STR(head_);
1654
1655 while (str[i] != '\0') {
1656 if (isdigit((unsigned char)str[i]))
1657 return 1;
1658 i++;
1659 }
1660
1661 return 0;
1662 }
1663
1664 static uim_lisp
get_nth(int nth,uim_lisp lst_)1665 get_nth(int nth, uim_lisp lst_)
1666 {
1667 int i;
1668 /* nth start from 1 */
1669 for (i = 1; i < nth; i++) {
1670 if (NULLP(lst_)) {
1671 return uim_scm_null();
1672 }
1673 lst_ = CDR(lst_);
1674 }
1675 return CAR(lst_);
1676 }
1677
1678 static int
get_purged_cand_index(struct skk_cand_array * ca)1679 get_purged_cand_index(struct skk_cand_array *ca)
1680 {
1681 int i, n = -1;
1682
1683 if (!ca)
1684 return -1;
1685
1686 for (i = 0; i < ca->nr_real_cands; i++) {
1687 if (is_purged_cand(ca->cands[i])) {
1688 n = i;
1689 break;
1690 }
1691 }
1692 return n;
1693 }
1694
1695 static int
get_ignoring_indices(struct skk_cand_array * ca,int indices[])1696 get_ignoring_indices(struct skk_cand_array *ca, int indices[])
1697 {
1698 int i, j, k = 0;
1699 int purged_cand_index;
1700
1701 purged_cand_index= get_purged_cand_index(ca);
1702
1703 if (purged_cand_index != -1) {
1704 char **purged_words = get_purged_words(ca->cands[purged_cand_index]);
1705 int nr_purged = nr_purged_words(purged_words);
1706
1707 indices[k] = purged_cand_index;
1708 k++;
1709
1710 for (i = ca->nr_real_cands; i < ca->nr_cands; i++) {
1711 if (k >= IGNORING_WORD_MAX)
1712 break;
1713 for (j = 0; j < nr_purged; j++) {
1714 if (!strcmp(ca->cands[i], purged_words[j])) {
1715 indices[k] = i;
1716 k++;
1717 }
1718 }
1719 }
1720 indices[k] = -1;
1721 free_allocated_purged_words(purged_words);
1722 } else {
1723 indices[0] = -1;
1724 }
1725 return k;
1726 }
1727
1728 static uim_lisp
skk_get_nth_candidate(uim_lisp skk_dic_,uim_lisp nth_,uim_lisp head_and_okuri_head_,uim_lisp okuri_,uim_lisp numeric_conv_)1729 skk_get_nth_candidate(uim_lisp skk_dic_, uim_lisp nth_,
1730 uim_lisp head_and_okuri_head_,
1731 uim_lisp okuri_,
1732 uim_lisp numeric_conv_)
1733 {
1734 int n;
1735 struct skk_cand_array *ca, *subca;
1736 int i, j, k = 0;
1737 const char *cands = NULL;
1738 char *p;
1739 const char *numstr;
1740 int method_place = 0;
1741 int sublen, newlen;
1742 int mark;
1743 uim_lisp str_ = uim_scm_null();
1744 uim_lisp numlst_ = uim_scm_null();
1745 int ignoring_indices[IGNORING_WORD_MAX + 1];
1746 dic_info *skk_dic = NULL;
1747 uim_lisp head_ = CAR(head_and_okuri_head_);
1748 uim_lisp okuri_head_ = CDR(head_and_okuri_head_);
1749
1750 if (PTRP(skk_dic_))
1751 skk_dic = C_PTR(skk_dic_);
1752
1753 if (TRUEP(numeric_conv_))
1754 numlst_ = skk_store_replaced_numeric_str(head_);
1755
1756 n = C_INT(nth_);
1757
1758 if (!NULLP(numlst_))
1759 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, okuri_, 0, numeric_conv_);
1760 else
1761 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, okuri_, 0, uim_scm_f());
1762
1763 get_ignoring_indices(ca, ignoring_indices);
1764
1765 if (ca) {
1766 /* handle #4 method of numeric conversion */
1767 if (!NULLP(numlst_)) {
1768 for (i = 0; i < ca->nr_cands; i++) {
1769 if (match_to_discarding_index(ignoring_indices, i))
1770 continue;
1771
1772 if ((p = find_numeric_conv_method4_mark(ca->cands[i], &method_place))) {
1773 numstr = REFER_C_STR(get_nth(method_place, numlst_));
1774 subca = find_cand_array(skk_dic, numstr, 0, NULL, 0);
1775 if (subca) {
1776 for (j = 0; j < subca->nr_cands; j++) {
1777 if (k == n) {
1778 char *str;
1779 str = uim_strdup(ca->cands[i]);
1780 sublen = strlen(subca->cands[j]);
1781 newlen = strlen(ca->cands[i]) - 2 + sublen;
1782 mark = p - ca->cands[i];
1783
1784 str = uim_realloc(str, newlen + 1);
1785 memmove(&str[mark + sublen],
1786 &str[mark + 2],
1787 newlen - mark - sublen + 1);
1788 memcpy(&str[mark], subca->cands[j], sublen);
1789
1790 str_ = MAKE_STR_DIRECTLY(str);
1791 return skk_merge_replaced_numeric_str(str_, numlst_);
1792 }
1793 k++;
1794 }
1795 }
1796 } else {
1797 if (k == n) {
1798 cands = ca->cands[i];
1799 break;
1800 }
1801 k++;
1802 }
1803 }
1804 } else {
1805 for (i = 0; i < ca->nr_cands; i++) {
1806 if (match_to_discarding_index(ignoring_indices, i))
1807 continue;
1808 if (k == n) {
1809 cands = ca->cands[i];
1810 break;
1811 }
1812 k++;
1813 }
1814 }
1815 }
1816
1817 /* check non-numeric conversion */
1818 if (!cands && n >= k && !NULLP(numlst_))
1819 return skk_get_nth_candidate(skk_dic_, MAKE_INT(n - k),
1820 head_and_okuri_head_, okuri_, uim_scm_f());
1821
1822 if (cands)
1823 str_ = MAKE_STR(cands);
1824
1825 if (!NULLP(numlst_))
1826 return skk_merge_replaced_numeric_str(str_, numlst_);
1827 else
1828 return str_;
1829 }
1830
1831 static uim_lisp
skk_get_nr_candidates(uim_lisp skk_dic_,uim_lisp head_,uim_lisp okuri_head_,uim_lisp okuri_,uim_lisp numeric_conv_)1832 skk_get_nr_candidates(uim_lisp skk_dic_, uim_lisp head_, uim_lisp okuri_head_, uim_lisp okuri_, uim_lisp numeric_conv_)
1833 {
1834 struct skk_cand_array *ca, *subca;
1835 int n = 0;
1836 int i, nr_cands = 0;
1837 const char *numstr;
1838 int method_place = 0;
1839 uim_lisp numlst_ = uim_scm_null();
1840 int ignoring_indices[IGNORING_WORD_MAX + 1];
1841 dic_info *skk_dic = NULL;
1842
1843 if (PTRP(skk_dic_))
1844 skk_dic = C_PTR(skk_dic_);
1845
1846 if (TRUEP(numeric_conv_))
1847 numlst_ = skk_store_replaced_numeric_str(head_);
1848
1849 if (!NULLP(numlst_))
1850 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, okuri_, 0, numeric_conv_);
1851 else
1852 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, okuri_, 0, uim_scm_f());
1853
1854 if (ca)
1855 n = ca->nr_cands;
1856 nr_cands = n;
1857 nr_cands -= get_ignoring_indices(ca, ignoring_indices);
1858
1859 /* handle #4 method of numeric conversion */
1860 if (!NULLP(numlst_)) {
1861 for (i = 0; i < n; i++) {
1862 if (match_to_discarding_index(ignoring_indices, i))
1863 continue;
1864
1865 if (find_numeric_conv_method4_mark(ca->cands[i], &method_place)) {
1866 numstr = REFER_C_STR(get_nth(method_place, numlst_));
1867 nr_cands--;
1868 subca = find_cand_array(skk_dic, numstr, 0, NULL, 0);
1869 if (subca)
1870 nr_cands += subca->nr_cands;
1871 break;
1872 }
1873 }
1874 }
1875
1876 /* add non-numeric conversion */
1877 if (!NULLP(numlst_))
1878 return MAKE_INT(nr_cands +
1879 C_INT(skk_get_nr_candidates(skk_dic_, head_, okuri_head_,
1880 okuri_, uim_scm_f())));
1881
1882 return MAKE_INT(nr_cands);
1883 }
1884
1885 static struct skk_comp_array *
make_comp_array_from_cache(dic_info * di,const char * s,uim_lisp use_look_)1886 make_comp_array_from_cache(dic_info *di, const char *s, uim_lisp use_look_)
1887 {
1888 struct skk_line *sl;
1889 struct skk_comp_array *ca;
1890
1891 if (!di)
1892 return NULL;
1893
1894 ca = uim_malloc(sizeof(struct skk_comp_array));
1895 ca->nr_comps = 0;
1896 ca->refcount = 0;
1897 ca->comps = NULL;
1898 ca->head = NULL;
1899 ca->next = NULL;
1900
1901 /* search from cache */
1902 for (sl = di->head.next; sl; sl = sl->next) {
1903 if (/* string 's' is part of sl->head */
1904 !strncmp(sl->head, s, strlen(s)) && strcmp(sl->head, s) &&
1905 /* and sl is okuri-nasi line */
1906 sl->okuri_head == '\0' &&
1907 /* exclude some entries */
1908 sl->state & SKK_LINE_USE_FOR_COMPLETION) {
1909 ca->nr_comps++;
1910 ca->comps = uim_realloc(ca->comps, sizeof(char *) * ca->nr_comps);
1911 ca->comps[ca->nr_comps - 1] = uim_strdup(sl->head);
1912 }
1913 }
1914
1915 if (TRUEP(use_look_))
1916 look_get_comp(ca, s);
1917
1918 if (ca->nr_comps == 0) {
1919 free(ca);
1920 ca = NULL;
1921 } else {
1922 ca->head = uim_strdup(s);
1923 ca->next = skk_comp;
1924 skk_comp = ca;
1925 }
1926 return ca;
1927 }
1928
1929 static struct skk_comp_array *
append_comp_array_from_server(struct skk_comp_array * ca,dic_info * di,const char * s,uim_lisp use_look_)1930 append_comp_array_from_server(struct skk_comp_array *ca, dic_info *di, const char *s, uim_lisp use_look_)
1931 {
1932 char r;
1933 struct skk_line *sl;
1934 int n = 0, ret, len;
1935 int i;
1936 char buf[SKK_SERV_BUFSIZ];
1937 char *line;
1938 ssize_t nr;
1939 struct pollfd pfd[1];
1940
1941 if (!di) {
1942 return ca;
1943 }
1944 if (!(di->skkserv_state & SKK_SERV_CONNECTED)) {
1945 if (!((di->skkserv_state |= open_skkserv(di->skkserv_hostname,
1946 di->skkserv_portnum,
1947 di->skkserv_family)) &
1948 SKK_SERV_CONNECTED))
1949 return ca;
1950 }
1951
1952 fprintf(wserv, "4%s \n", s);
1953 ret = fflush(wserv);
1954 if (ret != 0 && errno == EPIPE) {
1955 skkserv_disconnected(di);
1956 return ca;
1957 }
1958
1959 /* check server response to see the capability of completion */
1960 pfd[0].fd = skkservsock;
1961 pfd[0].events = POLLIN;
1962 ret = poll(pfd, 1, di->skkserv_completion_timeout);
1963 if (ret == -1) {
1964 skkserv_disconnected(di);
1965 return ca;
1966 } else if (ret == 0) {
1967 uim_notify_info(N_("SKK server without completion capability\n"));
1968 /* don't try server completion further any more */
1969 di->skkserv_state &= ~SKK_SERV_TRY_COMPLETION;
1970 return ca;
1971 }
1972
1973 if ((nr = read(skkservsock, &r, 1)) == -1 || nr == 0) {
1974 skkserv_disconnected(di);
1975 return ca;
1976 }
1977
1978 if (r == '1') {
1979 char sep = '\0';
1980 uim_asprintf(&line, "%s ", s);
1981 while (1) {
1982 if ((nr = read(skkservsock, &r, 1)) == -1 || nr == 0) {
1983 skkserv_disconnected(di);
1984 free(line);
1985 return ca;
1986 }
1987
1988 if (r == '\n') {
1989 len = strlen(line) + n;
1990 line = uim_realloc(line, len + 1);
1991 strlcat(line, buf, len + 1);
1992 break;
1993 }
1994
1995 /* FIXME: should handle word with '/' properly */
1996 if (n == 0 && sep == '\0') {
1997 sep = r;
1998 } else {
1999 if (sep == ' ' && r == ' ') {
2000 r = '/';
2001 }
2002 }
2003
2004 buf[n] = r;
2005 buf[n + 1] = '\0';
2006 if (n == SKK_SERV_BUFSIZ - 2) {
2007 len = strlen(line) + n + 1;
2008 line = uim_realloc(line, len + 1);
2009 strlcat(line, buf, len + 1);
2010 n = 0;
2011 } else {
2012 n++;
2013 }
2014 }
2015 sl = compose_line(di, s, '\0', line);
2016 free(line);
2017
2018 if (!ca) {
2019 ca = uim_malloc(sizeof(struct skk_comp_array));
2020 ca->nr_comps = 0;
2021 ca->refcount = 0;
2022 ca->comps = NULL;
2023 ca->head = NULL;
2024 ca->next = NULL;
2025 }
2026 for (i = 0; i < sl->cands[0].nr_cands; i++) {
2027 if (strcmp(s, sl->cands[0].cands[i]) != 0) {
2028 ca->nr_comps++;
2029 ca->comps = uim_realloc(ca->comps, sizeof(char *) * ca->nr_comps);
2030 ca->comps[ca->nr_comps - 1] = uim_strdup(sl->cands[0].cands[i]);
2031 }
2032 }
2033 free_skk_line(sl);
2034 if (ca->nr_comps == 0) {
2035 free(ca);
2036 ca = NULL;
2037 } else if (ca->head == NULL) {
2038 ca->head = uim_strdup(s);
2039 ca->next = skk_comp;
2040 skk_comp = ca;
2041 }
2042 } else {
2043 while ((nr = read(skkservsock, &r, 1)) != -1 && nr != 0 && r != '\n');
2044 }
2045
2046 return ca;
2047 }
2048
2049 static struct skk_comp_array *
find_comp_array(dic_info * di,const char * s,uim_lisp use_look_)2050 find_comp_array(dic_info *di, const char *s, uim_lisp use_look_)
2051 {
2052 struct skk_comp_array *ca;
2053
2054 if (strlen(s) == 0)
2055 return NULL;
2056
2057 for (ca = skk_comp; ca; ca = ca->next) {
2058 if (!strcmp(ca->head, s))
2059 break;
2060 }
2061 if (ca == NULL) {
2062 ca = make_comp_array_from_cache(di, s, use_look_);
2063 if (di->skkserv_state & SKK_SERV_TRY_COMPLETION)
2064 ca = append_comp_array_from_server(ca, di, s, use_look_);
2065 }
2066
2067 return ca;
2068 }
2069
2070 static struct skk_comp_array *
find_comp_array_lisp(dic_info * skk_dic,uim_lisp head_,uim_lisp numeric_conv_,uim_lisp use_look_)2071 find_comp_array_lisp(dic_info *skk_dic, uim_lisp head_, uim_lisp numeric_conv_, uim_lisp use_look_)
2072 {
2073 const char *hs;
2074 struct skk_comp_array *ca;
2075 char *rs = NULL;
2076
2077 hs = REFER_C_STR(head_);
2078
2079 if (TRUEP(numeric_conv_))
2080 rs = replace_numeric(hs);
2081
2082 if (!rs)
2083 ca = find_comp_array(skk_dic, hs, use_look_);
2084 else {
2085 ca = find_comp_array(skk_dic, rs, use_look_);
2086 free(rs);
2087 }
2088 return ca;
2089 }
2090
2091 static uim_lisp
skk_get_completion(uim_lisp skk_dic_,uim_lisp head_,uim_lisp numeric_conv_,uim_lisp use_look_)2092 skk_get_completion(uim_lisp skk_dic_, uim_lisp head_, uim_lisp numeric_conv_, uim_lisp use_look_)
2093 {
2094 struct skk_comp_array *ca;
2095 dic_info *skk_dic = NULL;
2096
2097 if (PTRP(skk_dic_))
2098 skk_dic = C_PTR(skk_dic_);
2099
2100 ca = find_comp_array_lisp(skk_dic, head_, numeric_conv_, use_look_);
2101 if (ca) {
2102 ca->refcount++;
2103 return uim_scm_t();
2104 }
2105
2106 if (TRUEP(numeric_conv_) && has_numeric_in_head(head_))
2107 return skk_get_completion(skk_dic_, head_, uim_scm_f(), use_look_);
2108
2109 return uim_scm_f();
2110 }
2111
2112 static uim_lisp
skk_get_nth_completion(uim_lisp skk_dic_,uim_lisp nth_,uim_lisp head_,uim_lisp numeric_conv_,uim_lisp use_look_)2113 skk_get_nth_completion(uim_lisp skk_dic_, uim_lisp nth_, uim_lisp head_,
2114 uim_lisp numeric_conv_, uim_lisp use_look_)
2115 {
2116 int n;
2117 struct skk_comp_array *ca;
2118 char *str;
2119 uim_lisp numlst_ = uim_scm_null();
2120 dic_info *skk_dic = NULL;
2121
2122 if (PTRP(skk_dic_))
2123 skk_dic = C_PTR(skk_dic_);
2124
2125 if (TRUEP(numeric_conv_))
2126 numlst_ = skk_store_replaced_numeric_str(head_);
2127
2128 if (!NULLP(numlst_))
2129 ca = find_comp_array_lisp(skk_dic, head_, numeric_conv_, use_look_);
2130 else
2131 ca = find_comp_array_lisp(skk_dic, head_, uim_scm_f(), use_look_);
2132
2133 if (!ca) {
2134 if (!NULLP(numlst_))
2135 return skk_get_nth_completion(skk_dic_, nth_, head_, uim_scm_f(), use_look_);
2136 else
2137 return MAKE_STR("");
2138 }
2139
2140 n = C_INT(nth_);
2141 if (ca->nr_comps > n) {
2142 str = ca->comps[n];
2143 if (!NULLP(numlst_))
2144 return restore_numeric(str, numlst_);
2145 else
2146 return MAKE_STR(str);
2147 }
2148
2149 if (!NULLP(numlst_) && n >= ca->nr_comps)
2150 return skk_get_nth_completion(skk_dic_, MAKE_INT(n - ca->nr_comps),
2151 head_, uim_scm_f(), use_look_);
2152
2153 return MAKE_STR("");
2154 }
2155
2156 static uim_lisp
skk_get_nr_completions(uim_lisp skk_dic_,uim_lisp head_,uim_lisp numeric_conv_,uim_lisp use_look_)2157 skk_get_nr_completions(uim_lisp skk_dic_, uim_lisp head_, uim_lisp numeric_conv_, uim_lisp use_look_)
2158 {
2159 int n = 0;
2160 struct skk_comp_array *ca;
2161 dic_info *skk_dic = NULL;
2162
2163 if (PTRP(skk_dic_))
2164 skk_dic = C_PTR(skk_dic_);
2165
2166 ca = find_comp_array_lisp(skk_dic, head_, numeric_conv_, use_look_);
2167 if (ca)
2168 n = ca->nr_comps;
2169
2170 if (TRUEP(numeric_conv_) && has_numeric_in_head(head_))
2171 return MAKE_INT(n +
2172 C_INT(skk_get_nr_completions(skk_dic_, head_, uim_scm_f(), use_look_)));
2173
2174 return MAKE_INT(n);
2175 }
2176
2177 static uim_lisp
skk_clear_completions(uim_lisp head_,uim_lisp numeric_conv_)2178 skk_clear_completions(uim_lisp head_, uim_lisp numeric_conv_)
2179 {
2180 int i;
2181 struct skk_comp_array *ca, *ca_prev;
2182 const char *hs;
2183 char *rs = NULL;
2184
2185 hs = REFER_C_STR(head_);
2186
2187 if (TRUEP(numeric_conv_))
2188 rs = replace_numeric(hs);
2189
2190 if (!rs)
2191 for (ca = skk_comp; ca; ca = ca->next) {
2192 if (!strcmp(ca->head, hs)) {
2193 ca->refcount--;
2194 break;
2195 }
2196 }
2197 else {
2198 for (ca = skk_comp; ca; ca = ca->next) {
2199 if (!strcmp(ca->head, rs)) {
2200 ca->refcount--;
2201 break;
2202 }
2203 }
2204 free(rs);
2205 }
2206
2207 if (ca && ca->refcount == 0) {
2208 for (i = 0; i < ca->nr_comps; i++) {
2209 free(ca->comps[i]);
2210 }
2211 free(ca->comps);
2212 free(ca->head);
2213
2214 if (ca == skk_comp) {
2215 skk_comp = ca->next;
2216 free(ca);
2217 } else {
2218 ca_prev = skk_comp;
2219 while (ca_prev->next != ca) {
2220 ca_prev = ca_prev->next;
2221 }
2222 ca_prev->next = ca->next;
2223 free(ca);
2224 }
2225 }
2226
2227 if (TRUEP(numeric_conv_) && has_numeric_in_head(head_))
2228 skk_clear_completions(head_, uim_scm_f());
2229
2230 return uim_scm_t();
2231 }
2232
2233 static uim_lisp
restore_numeric(const char * s,uim_lisp numlst_)2234 restore_numeric(const char *s, uim_lisp numlst_)
2235 {
2236 int i, j, len, newlen, numstrlen;
2237 const char *numstr;
2238 char *str;
2239
2240 str = uim_strdup(s);
2241 newlen = len = strlen(str);
2242
2243 for (i = 0, j = 0; j < len; i++, j++) {
2244 if (str[i] == '#') {
2245 if (NULLP(numlst_))
2246 break;
2247
2248 numstr = REFER_C_STR(CAR(numlst_));
2249 numstrlen = strlen(numstr);
2250 newlen = newlen - 1 + numstrlen;
2251 str = uim_realloc(str, newlen + 1);
2252 memmove(&str[i + numstrlen], &str[i + 1], newlen - i - numstrlen + 1);
2253 memcpy(&str[i], numstr, numstrlen);
2254 i = i - 1 + numstrlen;
2255
2256 numlst_ = CDR(numlst_);
2257 }
2258 }
2259 return MAKE_STR_DIRECTLY(str);
2260 }
2261
2262 static uim_lisp
skk_get_dcomp_word(uim_lisp skk_dic_,uim_lisp head_,uim_lisp numeric_conv_,uim_lisp use_look_)2263 skk_get_dcomp_word(uim_lisp skk_dic_, uim_lisp head_, uim_lisp numeric_conv_, uim_lisp use_look_)
2264 {
2265 const char *hs;
2266 struct skk_line *sl;
2267 int len;
2268 uim_lisp numlst_, look_;
2269 char *rs = NULL;
2270 dic_info *skk_dic = NULL;
2271
2272 if (PTRP(skk_dic_))
2273 skk_dic = C_PTR(skk_dic_);
2274
2275 numlst_ = uim_scm_null();
2276 hs = REFER_C_STR(head_);
2277
2278 if (TRUEP(numeric_conv_))
2279 numlst_ = skk_store_replaced_numeric_str(head_);
2280
2281 if (!NULLP(numlst_)) {
2282 rs = replace_numeric(hs);
2283 len = strlen(rs);
2284 } else
2285 len = strlen(hs);
2286
2287 if (len != 0) {
2288 /* Search from cache using same way as in make_comp_array_from_cache(). */
2289 if (!rs) {
2290 for (sl = skk_dic->head.next; sl; sl = sl->next) {
2291 if (!strncmp(sl->head, hs, len) && strcmp(sl->head, hs) &&
2292 sl->okuri_head == '\0' &&
2293 sl->state & SKK_LINE_USE_FOR_COMPLETION)
2294 return MAKE_STR(sl->head);
2295 }
2296 if (TRUEP(use_look_)) {
2297 look_ = look_get_top_word(hs);
2298 if (TRUEP(look_))
2299 return look_;
2300 }
2301 } else {
2302 for (sl = skk_dic->head.next; sl; sl = sl->next) {
2303 if (!strncmp(sl->head, rs, len) && strcmp(sl->head, rs) &&
2304 sl->okuri_head == '\0' &&
2305 sl->state & SKK_LINE_USE_FOR_COMPLETION) {
2306 free(rs);
2307 return restore_numeric(sl->head, numlst_);
2308 }
2309 }
2310 if (TRUEP(use_look_)) {
2311 look_ = look_get_top_word(rs);
2312 free(rs);
2313 if (TRUEP(look_))
2314 return look_;
2315 } else {
2316 free(rs);
2317 }
2318 return skk_get_dcomp_word(skk_dic_, head_, uim_scm_f(), use_look_);
2319 }
2320 }
2321 return MAKE_STR("");
2322 }
2323
2324 static void
reorder_candidate(dic_info * skk_dic,struct skk_cand_array * ca,const char * str)2325 reorder_candidate(dic_info *skk_dic, struct skk_cand_array *ca, const char *str)
2326 {
2327 int i;
2328 int nth = 0;
2329 char *tmp;
2330 /* find index of the candidate */
2331 for (i = 0; i < ca->nr_cands; i++) {
2332 if (!strcmp(str, ca->cands[i])) {
2333 nth = i;
2334 break;
2335 }
2336 }
2337
2338 /* shift array */
2339 tmp = ca->cands[nth];
2340 if (nth) {
2341 for (i = nth; i > 0; i--)
2342 ca->cands[i] = ca->cands[i - 1];
2343 ca->cands[0] = tmp;
2344 skk_dic->cache_modified = 1;
2345 }
2346 /* */
2347 if (nth >= ca->nr_real_cands)
2348 ca->nr_real_cands++;
2349 }
2350
push_purged_word(dic_info * skk_dic,struct skk_cand_array * ca,int nth,int append,char * word)2351 static void push_purged_word(dic_info *skk_dic, struct skk_cand_array *ca, int nth, int append, char *word)
2352 {
2353 char *cand = ca->cands[nth];
2354 int len, oldlen = strlen(cand);
2355 char *p = sanitize_word(word, NULL);
2356
2357 if (!p)
2358 return;
2359
2360 if (append) {
2361 /* check whether the word is already registerd */
2362 char **purged_words = get_purged_words(cand);
2363 int nr_purged = nr_purged_words(purged_words);
2364 int j;
2365 for (j = 0; j < nr_purged; j++) {
2366 if (!strcmp(purged_words[j], word)) {
2367 free_allocated_purged_words(purged_words);
2368 return;
2369 }
2370 }
2371 free_allocated_purged_words(purged_words);
2372
2373 len = oldlen + strlen(p) + 3;
2374 cand = uim_realloc(cand, len + 1);
2375 if (cand) {
2376 cand[oldlen - 1] = '\0';
2377 strcat(cand, " \"");
2378 strcat(cand, p);
2379 strcat(cand, "\")");
2380 ca->cands[nth] = cand;
2381 skk_dic->cache_modified = 1;
2382 }
2383 } else {
2384 len = strlen("(skk-ignore-dic-word \"\")") + strlen(p) + 1;
2385 cand = uim_realloc(cand, len);
2386 if (cand) {
2387 snprintf(cand, len, "(skk-ignore-dic-word \"%s\")", p);
2388 ca->cands[nth] = cand;
2389 skk_dic->cache_modified = 1;
2390 }
2391 }
2392 }
2393
remove_candidate_from_array(dic_info * skk_dic,struct skk_cand_array * ca,int nth)2394 static void remove_candidate_from_array(dic_info *skk_dic, struct skk_cand_array *ca, int nth)
2395 {
2396 int i;
2397
2398 free(ca->cands[nth]);
2399 for (i = nth; i < ca->nr_cands - 1; i++)
2400 ca->cands[i] = ca->cands[i + 1];
2401 if (nth < ca->nr_real_cands)
2402 ca->nr_real_cands--;
2403 ca->nr_cands--;
2404 skk_dic->cache_modified = 1;
2405 }
2406
2407 static void
merge_word_to_real_cand_array(struct skk_cand_array * ca,const char * word)2408 merge_word_to_real_cand_array(struct skk_cand_array *ca, const char *word)
2409 {
2410 int i, nth = -1;
2411 char *tmp;
2412
2413 push_back_candidate_to_array(ca, word);
2414 nth = ca->nr_cands - 1;
2415
2416 /* move word at the end of real cand array */
2417 tmp = ca->cands[nth];
2418 if (nth >= ca->nr_real_cands) {
2419 for (i = nth; i > ca->nr_real_cands; i--)
2420 ca->cands[i] = ca->cands[i - 1];
2421 ca->cands[ca->nr_real_cands] = tmp;
2422 ca->nr_real_cands++;
2423 }
2424 }
2425
exist_in_purged_cand(struct skk_cand_array * ca,const char * word)2426 static int exist_in_purged_cand(struct skk_cand_array *ca,
2427 const char *word)
2428 {
2429 int i, purged_cand_index;
2430 char **purged_words;
2431 int nr_purged;
2432
2433 purged_cand_index = get_purged_cand_index(ca);
2434 if (purged_cand_index == -1)
2435 return 0;
2436
2437 purged_words = get_purged_words(ca->cands[purged_cand_index]);
2438 nr_purged = nr_purged_words(purged_words);
2439
2440 for (i = 0; i < nr_purged; i++) {
2441 if (!strcmp(purged_words[i], word)) {
2442 free_allocated_purged_words(purged_words);
2443 return 1;
2444 }
2445 }
2446 free_allocated_purged_words(purged_words);
2447 return 0;
2448 }
2449
index_in_real_cands(struct skk_cand_array * ca,const char * str)2450 static int index_in_real_cands(struct skk_cand_array *ca, const char *str)
2451 {
2452 int i;
2453 for (i = 0; i < ca->nr_real_cands; i++) {
2454 if (!strcmp(ca->cands[i], str))
2455 return i;
2456 }
2457 return -1;
2458 }
2459
2460 static void
remove_purged_words_from_dst_cand_array(dic_info * skk_dic,struct skk_cand_array * src_ca,struct skk_cand_array * dst_ca,const char * purged_cand)2461 remove_purged_words_from_dst_cand_array(dic_info *skk_dic,
2462 struct skk_cand_array *src_ca,
2463 struct skk_cand_array *dst_ca, const char *purged_cand)
2464 {
2465 char **purged_words;
2466 int nr_words;
2467 int i, j;
2468
2469 purged_words = get_purged_words(purged_cand);
2470 nr_words = nr_purged_words(purged_words);
2471
2472 for (i = 0; i < nr_words; i++) {
2473 int dup = 0;
2474
2475 if (index_in_real_cands(src_ca, purged_words[i]) != -1)
2476 continue;
2477
2478 for (j = 0; j < dst_ca->nr_real_cands; j++) {
2479 if (!strcmp(purged_words[i], dst_ca->cands[j])) {
2480 dup = 1;
2481 break;
2482 }
2483 }
2484 if (dup)
2485 remove_candidate_from_array(skk_dic, dst_ca, j);
2486 }
2487 free_allocated_purged_words(purged_words);
2488 }
2489
2490 static void
merge_purged_cands(dic_info * skk_dic,struct skk_cand_array * src_ca,struct skk_cand_array * dst_ca,int src_nth,int dst_nth)2491 merge_purged_cands(dic_info *skk_dic, struct skk_cand_array *src_ca,
2492 struct skk_cand_array *dst_ca, int src_nth, int dst_nth)
2493 {
2494 char *src_cand = src_ca->cands[src_nth];
2495 char *dst_cand = dst_ca->cands[dst_nth];
2496 char **dst_purged_words, **src_purged_words;
2497 int nr_dst_purged_words, nr_src_purged_words;
2498 int i, j;
2499
2500 src_purged_words = get_purged_words(src_cand);
2501 dst_purged_words = get_purged_words(dst_cand);
2502 nr_src_purged_words = nr_purged_words(src_purged_words);
2503 nr_dst_purged_words = nr_purged_words(dst_purged_words);
2504
2505 for (i = 0; i < nr_src_purged_words; i++) {
2506 int dup = 0;
2507 for (j = 0; j < nr_dst_purged_words; j++) {
2508 if (!strcmp(src_purged_words[i], dst_purged_words[j])) {
2509 dup = 1;
2510 break;
2511 }
2512 }
2513 if (!dup) {
2514 push_purged_word(skk_dic, dst_ca, dst_nth, 1, src_purged_words[i]);
2515 remove_purged_words_from_dst_cand_array(skk_dic, src_ca, dst_ca, src_ca->cands[src_nth]);
2516 }
2517 }
2518 free_allocated_purged_words(dst_purged_words);
2519 free_allocated_purged_words(src_purged_words);
2520 }
2521
2522 static void
merge_purged_cand_to_dst_array(dic_info * skk_dic,struct skk_cand_array * src_ca,struct skk_cand_array * dst_ca,char * purged_cand)2523 merge_purged_cand_to_dst_array(dic_info *skk_dic,
2524 struct skk_cand_array *src_ca,
2525 struct skk_cand_array *dst_ca, char *purged_cand)
2526 {
2527 remove_purged_words_from_dst_cand_array(skk_dic, src_ca, dst_ca, purged_cand);
2528 merge_word_to_real_cand_array(dst_ca, purged_cand);
2529 }
2530
2531 static void
merge_word_to_dst_cand_array_with_purged_words(struct skk_cand_array * dst_ca,struct skk_cand_array * src_ca,const char * src_cand)2532 merge_word_to_dst_cand_array_with_purged_words(struct skk_cand_array *dst_ca,
2533 struct skk_cand_array *src_ca, const char *src_cand)
2534 {
2535 int i, nth;
2536 char *tmp;
2537
2538 if (exist_in_purged_cand(dst_ca, src_cand) && !exist_in_purged_cand(src_ca, src_cand))
2539 return;
2540
2541 push_back_candidate_to_array(dst_ca, src_cand);
2542 nth = dst_ca->nr_cands - 1;
2543
2544 /* move word at the end of real cand array */
2545 tmp = dst_ca->cands[nth];
2546 if (nth >= dst_ca->nr_real_cands) {
2547 for (i = nth; i > dst_ca->nr_real_cands; i--)
2548 dst_ca->cands[i] = dst_ca->cands[i - 1];
2549 dst_ca->cands[dst_ca->nr_real_cands] = tmp;
2550 dst_ca->nr_real_cands++;
2551 }
2552 }
2553
2554 static void
merge_real_candidate_array(dic_info * skk_dic,struct skk_cand_array * src_ca,struct skk_cand_array * dst_ca)2555 merge_real_candidate_array(dic_info *skk_dic,
2556 struct skk_cand_array *src_ca,
2557 struct skk_cand_array *dst_ca)
2558 {
2559 int i, j;
2560 int src_nr_real_cands = src_ca->nr_real_cands;
2561 int dst_nr_real_cands = dst_ca->nr_real_cands;
2562
2563 if (!src_ca || !dst_ca)
2564 return ;
2565
2566 for (i = 0; i < src_nr_real_cands; i++) {
2567 int dup = 0;
2568 int src_purged_cand_index = -1;
2569 int dst_purged_cand_index = -1;
2570
2571 if (is_purged_cand(src_ca->cands[i]))
2572 src_purged_cand_index = i;
2573
2574 for (j = 0; j < dst_nr_real_cands; j++) {
2575 if (dst_purged_cand_index == -1 && is_purged_cand(dst_ca->cands[j]))
2576 dst_purged_cand_index = j;
2577 if (!strcmp(src_ca->cands[i], dst_ca->cands[j]))
2578 dup = 1;
2579 }
2580
2581 if (!dup) {
2582 /* be careful! */
2583 if (src_purged_cand_index != -1 && dst_purged_cand_index != -1)
2584 merge_purged_cands(skk_dic, src_ca, dst_ca, src_purged_cand_index,
2585 dst_purged_cand_index);
2586 else if (src_purged_cand_index != -1 && dst_purged_cand_index == -1)
2587 merge_purged_cand_to_dst_array(skk_dic, src_ca, dst_ca,
2588 src_ca->cands[src_purged_cand_index]);
2589 else if (src_purged_cand_index == -1 && dst_purged_cand_index != -1)
2590 merge_word_to_dst_cand_array_with_purged_words(dst_ca, src_ca,
2591 src_ca->cands[i]);
2592 else
2593 merge_word_to_real_cand_array(dst_ca, src_ca->cands[i]);
2594 }
2595 }
2596 }
2597
2598 static uim_lisp
skk_commit_candidate(uim_lisp skk_dic_,uim_lisp head_and_okuri_head_,uim_lisp okuri_,uim_lisp nth_,uim_lisp numeric_conv_)2599 skk_commit_candidate(uim_lisp skk_dic_, uim_lisp head_and_okuri_head_,
2600 uim_lisp okuri_, uim_lisp nth_, uim_lisp numeric_conv_)
2601 {
2602 int nth;
2603 struct skk_cand_array *ca, *subca;
2604 char *str = NULL;
2605 int i, j, k = 0;
2606 uim_lisp numstr_;
2607 const char *numstr;
2608 int method_place = 0;
2609 uim_lisp numlst_ = uim_scm_null();
2610 int ignoring_indices[IGNORING_WORD_MAX + 1];
2611 dic_info *skk_dic = NULL;
2612 uim_lisp head_ = CAR(head_and_okuri_head_);
2613 uim_lisp okuri_head_ = CDR(head_and_okuri_head_);
2614
2615 if (PTRP(skk_dic_))
2616 skk_dic = C_PTR(skk_dic_);
2617
2618 if (TRUEP(numeric_conv_))
2619 numlst_ = skk_store_replaced_numeric_str(head_);
2620
2621 nth = C_INT(nth_);
2622
2623 if (!NULLP(numlst_))
2624 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, okuri_, 0, numeric_conv_);
2625 else
2626 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, okuri_, 0, uim_scm_f());
2627
2628 if (!ca) {
2629 if (!NULLP(numlst_))
2630 return skk_commit_candidate(skk_dic_, head_and_okuri_head_, okuri_, nth_,
2631 uim_scm_f());
2632 return uim_scm_f();
2633 }
2634
2635 get_ignoring_indices(ca, ignoring_indices);
2636
2637 /* handle #4 method of numeric conversion */
2638 if (!NULLP(numlst_)) {
2639 for (i = 0; i < ca->nr_cands; i++) {
2640 if (match_to_discarding_index(ignoring_indices, i))
2641 continue;
2642
2643 if (find_numeric_conv_method4_mark(ca->cands[i], &method_place)) {
2644 numstr_ = get_nth(method_place, numlst_);
2645 numstr = REFER_C_STR(numstr_);
2646 subca = find_cand_array(skk_dic, numstr, 0, NULL, 0);
2647 if (subca) {
2648 for (j = 0; j < subca->nr_cands; j++) {
2649 if (k == nth) {
2650 str = ca->cands[i];
2651 /* reorder sub candidate */
2652 skk_commit_candidate(skk_dic_, CONS(numstr_, uim_scm_null()),
2653 uim_scm_null(), MAKE_INT(j), uim_scm_f());
2654 break;
2655 }
2656 k++;
2657 }
2658 }
2659 if (str)
2660 break;
2661 } else {
2662 if (k == nth) {
2663 str = ca->cands[i];
2664 break;
2665 }
2666 k++;
2667 }
2668 }
2669 if (!str) {
2670 if (nth >= k)
2671 return skk_commit_candidate(skk_dic_, head_and_okuri_head_, okuri_,
2672 MAKE_INT(nth - k), uim_scm_f());
2673 return uim_scm_f();
2674 }
2675 } else {
2676 for (i = 0; i < ca->nr_cands; i++) {
2677 if (match_to_discarding_index(ignoring_indices, i))
2678 continue;
2679 if (k == nth) {
2680 str = ca->cands[i];
2681 break;
2682 }
2683 k++;
2684 }
2685 if (!str)
2686 return uim_scm_f();
2687 }
2688 reorder_candidate(skk_dic, ca, str);
2689
2690 if (okuri_ != uim_scm_null()) {
2691 struct skk_line *sl;
2692 const char *okuri;
2693 int found = 0;
2694
2695 okuri = REFER_C_STR(okuri_);
2696 sl = ca->line;
2697 for (i = 1; i < sl->nr_cand_array; i++) {
2698 if (!strcmp(okuri, sl->cands[i].okuri)) {
2699 found = 1;
2700 break;
2701 }
2702 }
2703 if (!found) {
2704 if (!NULLP(numlst_))
2705 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, okuri_, 1, numeric_conv_);
2706 else
2707 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, okuri_, 1, uim_scm_f());
2708 reorder_candidate(skk_dic, ca, str);
2709 } else {
2710 /* also reorder base candidate array */
2711 reorder_candidate(skk_dic, &sl->cands[0], str);
2712 }
2713 }
2714
2715 ca->line->state = SKK_LINE_NEED_SAVE | SKK_LINE_USE_FOR_COMPLETION;
2716 move_line_to_cache_head(skk_dic, ca->line);
2717
2718 return uim_scm_f();
2719 }
2720
purge_candidate(dic_info * skk_dic,struct skk_cand_array * ca,int nth)2721 static void purge_candidate(dic_info *skk_dic, struct skk_cand_array *ca, int nth)
2722 {
2723 char *str;
2724 int i;
2725
2726 if (nth == -1)
2727 return;
2728
2729 str = uim_strdup(ca->cands[nth]);
2730
2731 if ((i = get_purged_cand_index(ca)) == -1) {
2732 /* new purged cand in the array */
2733 push_purged_word(skk_dic, ca, nth, 0, str);
2734 } else {
2735 /* append the word to already existing purged cand and remove it own */
2736 push_purged_word(skk_dic, ca, i, 1, str);
2737 remove_candidate_from_array(skk_dic, ca, nth);
2738 }
2739
2740 #if 0
2741 /* Disabled since we use okuri specific ignoing words */
2742 if (ca->okuri) {
2743 /* also purge the word in the base cand array */
2744 int index = index_in_real_cands(&ca->line->cands[0], str);
2745 if (index != -1)
2746 purge_candidate(skk_dic, &ca->line->cands[0], index);
2747 }
2748 #endif
2749 free(str);
2750 }
2751
2752 static uim_lisp
skk_purge_candidate(uim_lisp skk_dic_,uim_lisp head_and_okuri_head_,uim_lisp okuri_,uim_lisp nth_,uim_lisp numeric_conv_)2753 skk_purge_candidate(uim_lisp skk_dic_, uim_lisp head_and_okuri_head_,
2754 uim_lisp okuri_, uim_lisp nth_, uim_lisp numeric_conv_)
2755 {
2756 int nth = C_INT(nth_);
2757 struct skk_cand_array *ca, *subca;
2758 char *str = NULL;
2759 int i, j, k = 0;
2760 uim_lisp numstr_;
2761 const char *numstr;
2762 int method_place = 0;
2763 uim_lisp numlst_ = uim_scm_null();
2764 int ignoring_indices[IGNORING_WORD_MAX + 1];
2765 dic_info *skk_dic = NULL;
2766 uim_lisp head_ = CAR(head_and_okuri_head_);
2767 uim_lisp okuri_head_ = CDR(head_and_okuri_head_);
2768
2769 if (PTRP(skk_dic_))
2770 skk_dic = C_PTR(skk_dic_);
2771
2772 if (TRUEP(numeric_conv_))
2773 numlst_ = skk_store_replaced_numeric_str(head_);
2774
2775 if (!NULLP(numlst_))
2776 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, okuri_, 0, numeric_conv_);
2777 else
2778 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, okuri_, 0, uim_scm_f());
2779
2780 if (!ca) {
2781 if (!NULLP(numlst_))
2782 return skk_purge_candidate(skk_dic_, head_and_okuri_head_, okuri_, nth_,
2783 uim_scm_f());
2784 return uim_scm_f(); /* shouldn't happen */
2785 }
2786
2787 get_ignoring_indices(ca, ignoring_indices);
2788
2789 /* handle #4 method of numeric conversion */
2790 if (!NULLP(numlst_)) {
2791 for (i = 0; i < ca->nr_cands; i++) {
2792 if (match_to_discarding_index(ignoring_indices, i))
2793 continue;
2794
2795 if (find_numeric_conv_method4_mark(ca->cands[i], &method_place)) {
2796 numstr_ = get_nth(method_place, numlst_);
2797 numstr = REFER_C_STR(numstr_);
2798 subca = find_cand_array(skk_dic, numstr, 0, NULL, 0);
2799 if (subca) {
2800 for (j = 0; j < subca->nr_cands; j++) {
2801 if (k == nth) {
2802 str = ca->cands[i];
2803 /*
2804 * don't purge word in sub candidate array
2805 * skk_purge_candidate(skk_dic_, numstr_, uim_scm_null(), uim_scm_null(), MAKE_INT(j), uim_scm_null());
2806 */
2807 break;
2808 }
2809 k++;
2810 }
2811 }
2812 if (str)
2813 break;
2814 } else {
2815 if (k == nth) {
2816 str = ca->cands[i];
2817 break;
2818 }
2819 k++;
2820 }
2821 }
2822 if (!str) {
2823 if (nth >= k)
2824 skk_purge_candidate(skk_dic_, head_and_okuri_head_, okuri_,
2825 MAKE_INT(nth - k), uim_scm_f());
2826 return uim_scm_f();
2827 }
2828 } else {
2829 for (i = 0; i < ca->nr_cands; i++) {
2830 if (match_to_discarding_index(ignoring_indices, i))
2831 continue;
2832 if (k == nth)
2833 break;
2834 k++;
2835 }
2836 }
2837 if (i < ca->nr_real_cands)
2838 purge_candidate(skk_dic, ca, i);
2839
2840 return uim_scm_t();
2841 }
2842
2843 static void
learn_word_to_cand_array(dic_info * skk_dic,struct skk_cand_array * ca,const char * word)2844 learn_word_to_cand_array(dic_info *skk_dic, struct skk_cand_array *ca, const char *word)
2845 {
2846 int i, nth = -1;
2847 for (i = 0; i < ca->nr_cands; i++) {
2848 if (!strcmp(word, ca->cands[i])) {
2849 nth = i;
2850 break;
2851 }
2852 }
2853 if (nth == -1)
2854 push_back_candidate_to_array(ca, word);
2855
2856 reorder_candidate(skk_dic, ca, word);
2857 ca->line->state = SKK_LINE_NEED_SAVE | SKK_LINE_USE_FOR_COMPLETION;
2858 }
2859
2860 static char *
quote_word(const char * word,const char * prefix)2861 quote_word(const char *word, const char *prefix)
2862 {
2863 char *str;
2864 const char *p;
2865 int len;
2866
2867 if (prefix)
2868 str = uim_strdup(prefix);
2869 else
2870 str = uim_strdup("");
2871
2872 for (p = word; *p; p++) {
2873 len = strlen(str);
2874
2875 switch (*p) {
2876 case '/':
2877 str = uim_realloc(str, len + strlen("\\057") + 1);
2878 strcat(str, "\\057");
2879 break;
2880 case '[':
2881 str = uim_realloc(str, len + strlen("[") + 1);
2882 strcat(str, "[");
2883 break;
2884 case ']':
2885 str = uim_realloc(str, len + strlen("]") + 1);
2886 strcat(str, "]");
2887 break;
2888 case '\n':
2889 str = uim_realloc(str, len + strlen("\\n") + 1);
2890 strcat(str, "\\n");
2891 break;
2892 case '\r':
2893 str = uim_realloc(str, len + strlen("\\r") + 1);
2894 strcat(str, "\\r");
2895 break;
2896 case '\\':
2897 str = uim_realloc(str, len + strlen("\\\\") + 1);
2898 strcat(str, "\\\\");
2899 break;
2900 case ';':
2901 str = uim_realloc(str, len + strlen("\\073") + 1);
2902 strcat(str, "\\073");
2903 break;
2904 case '"':
2905 str = uim_realloc(str, len + strlen("\\\"") + 1);
2906 strcat(str, "\\\"");
2907 break;
2908 default:
2909 str = uim_realloc(str, len + 2);
2910 str[len] = *p;
2911 str[len + 1] = '\0';
2912 break;
2913 }
2914 }
2915 len = strlen(str);
2916 if (prefix) {
2917 str = uim_realloc(str, len + strlen("\")") + 1);
2918 strcat(str, "\")");
2919 }
2920
2921 return str;
2922 }
2923
2924 static char *
sanitize_word(const char * str,const char * prefix)2925 sanitize_word(const char *str, const char *prefix)
2926 {
2927 const char *p;
2928 int is_space_only = 1;
2929
2930 if (!str || !strlen(str)) {
2931 return NULL;
2932 }
2933 for (p = str; *p; p++) {
2934 switch (*p) {
2935 case '/':
2936 case '[':
2937 case ']':
2938 case '\n':
2939 case '\r':
2940 case '\\':
2941 case ';':
2942 case '"':
2943 return quote_word(str, prefix);
2944 case ' ':
2945 break;
2946 default:
2947 is_space_only = 0;
2948 break;
2949 }
2950 }
2951 if (is_space_only)
2952 return NULL;
2953
2954 return uim_strdup(str);
2955 }
2956
2957 static uim_lisp
skk_learn_word(uim_lisp skk_dic_,uim_lisp head_and_okuri_head_,uim_lisp okuri_,uim_lisp word_,uim_lisp numeric_conv_)2958 skk_learn_word(uim_lisp skk_dic_, uim_lisp head_and_okuri_head_,
2959 uim_lisp okuri_, uim_lisp word_, uim_lisp numeric_conv_)
2960 {
2961 struct skk_cand_array *ca;
2962 char *word;
2963 const char *tmp;
2964 dic_info *skk_dic = NULL;
2965 uim_lisp head_ = CAR(head_and_okuri_head_);
2966 uim_lisp okuri_head_ = CDR(head_and_okuri_head_);
2967
2968 if (PTRP(skk_dic_))
2969 skk_dic = C_PTR(skk_dic_);
2970
2971 tmp = REFER_C_STR(word_);
2972 word = sanitize_word(tmp, "(concat \"");
2973 if (!word)
2974 return uim_scm_f();
2975
2976 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, okuri_, 1, numeric_conv_);
2977 if (ca)
2978 learn_word_to_cand_array(skk_dic, ca, word);
2979
2980 tmp = REFER_C_STR(okuri_);
2981 if (strlen(tmp)) {
2982 ca = find_cand_array_lisp(skk_dic, head_, okuri_head_, uim_scm_null(), 1, numeric_conv_);
2983 if (ca)
2984 learn_word_to_cand_array(skk_dic, ca, word);
2985 }
2986 free(word);
2987 return uim_scm_f();
2988 }
2989
2990 static void
reverse_cache(dic_info * di)2991 reverse_cache(dic_info *di)
2992 {
2993 struct skk_line *sl, *prev, *next;
2994
2995 prev= NULL;
2996 sl = di->head.next;
2997 while (sl) {
2998 next = sl->next;
2999 sl->next = prev;
3000 prev = sl;
3001 sl = next;
3002 }
3003 di->head.next = prev;
3004 }
3005
3006 static void
parse_dic_line(dic_info * di,char * line,int is_personal)3007 parse_dic_line(dic_info *di, char *line, int is_personal)
3008 {
3009 char *buf, *sep;
3010 struct skk_line *sl;
3011 int i;
3012
3013 buf = uim_strdup(line);
3014 sep = strchr(buf, ' ');
3015
3016 if (!sep || (sep == buf)) {
3017 free(buf);
3018 return;
3019 }
3020
3021 *sep = '\0';
3022 if ((!skk_isascii(buf[0]) || buf[0] == '>') && skk_islower(sep[-1])) {
3023 /* okuri-ari entry */
3024 char okuri_head = sep[-1];
3025 sep[-1] = '\0';
3026 sl = compose_line(di, buf, okuri_head, line);
3027 } else {
3028 sl = compose_line(di, buf, 0, line);
3029 }
3030 if (is_personal) {
3031 sl->state = SKK_LINE_NEED_SAVE | SKK_LINE_USE_FOR_COMPLETION;
3032 /* set nr_real_cands for the candidate array from personal dictionaly */
3033 for (i = 0; i < sl->nr_cand_array; i++)
3034 sl->cands[i].nr_real_cands = sl->cands[i].nr_cands;
3035 } else {
3036 sl->state = SKK_LINE_USE_FOR_COMPLETION;
3037 }
3038 add_line_to_cache_head(di, sl);
3039 free(buf);
3040 }
3041
3042 static void
write_out_array(FILE * fp,struct skk_cand_array * ca)3043 write_out_array(FILE *fp, struct skk_cand_array *ca)
3044 {
3045 int i;
3046 if (ca->okuri) {
3047 fprintf(fp, "[%s/", ca->okuri);
3048 for (i = 0; i < ca->nr_real_cands; i++)
3049 fprintf(fp, "%s/", ca->cands[i]);
3050 fprintf(fp, "]/");
3051 } else {
3052 for (i = 0; i < ca->nr_real_cands; i++)
3053 fprintf(fp, "%s/", ca->cands[i]);
3054 }
3055 }
3056
3057 static void
write_out_line(FILE * fp,struct skk_line * sl)3058 write_out_line(FILE *fp, struct skk_line *sl)
3059 {
3060 struct skk_cand_array *ca;
3061 int i;
3062
3063 fprintf(fp, "%s", sl->head);
3064 if (sl->okuri_head) {
3065 fprintf(fp, "%c /", sl->okuri_head);
3066 } else {
3067 fprintf(fp, " /");
3068 }
3069 for (i = 0; i < sl->nr_cand_array; i++) {
3070 ca = &sl->cands[i];
3071 write_out_array(fp, ca);
3072 }
3073 fprintf(fp, "\n");
3074 }
3075
3076 static int
open_lock(const char * name,int type)3077 open_lock(const char *name, int type)
3078 {
3079 int fd;
3080 struct flock fl;
3081 char lock_fn[MAXPATHLEN];
3082
3083 snprintf(lock_fn, sizeof(lock_fn), "%s.lock", name);
3084
3085 fd = open(lock_fn, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
3086 if (fd == -1)
3087 return fd;
3088
3089 fl.l_type = type;
3090 fl.l_whence = SEEK_SET;
3091 fl.l_start = 0;
3092 fl.l_len = 0;
3093 if (fcntl(fd, F_SETLKW, &fl) == -1) {
3094 close(fd);
3095 fd = -1;
3096 }
3097
3098 return fd;
3099 }
3100
3101 static void
close_lock(int fd)3102 close_lock(int fd)
3103 {
3104 struct flock fl;
3105
3106 if (fd < 0)
3107 return;
3108
3109 fl.l_type = F_UNLCK;
3110 fl.l_whence = SEEK_SET;
3111 fl.l_start = 0;
3112 fl.l_len = 0;
3113
3114 fcntl(fd, F_SETLKW, &fl);
3115 close(fd);
3116 }
3117
3118 static int
read_dictionary_file(dic_info * di,const char * fn,int is_personal)3119 read_dictionary_file(dic_info *di, const char *fn, int is_personal)
3120 {
3121 struct stat st;
3122 FILE *fp;
3123 char buf[4096]; /* XXX */
3124 int err_flag = 0;
3125 int lock_fd;
3126
3127 if (!di)
3128 return 0;
3129
3130 lock_fd = open_lock(fn, F_RDLCK);
3131
3132 if (stat(fn, &st) == -1) {
3133 close_lock(lock_fd);
3134 return 0;
3135 }
3136
3137 fp = fopen(fn, "r");
3138 if (!fp) {
3139 close_lock(lock_fd);
3140 return 0;
3141 }
3142
3143 di->personal_dic_timestamp = st.st_mtime;
3144
3145 while (fgets(buf, 4096, fp)) { /* XXX */
3146 int len = strlen(buf);
3147 if (buf[len - 1] == '\n') {
3148 if (err_flag == 0) {
3149 if (buf[0] != ';') {
3150 buf[len - 1] = '\0';
3151 parse_dic_line(di, buf, is_personal);
3152 }
3153 } else {
3154 /* erroneous line ends here */
3155 err_flag = 0;
3156 }
3157 } else {
3158 err_flag = 1;
3159 }
3160 }
3161 fclose(fp);
3162 close_lock(lock_fd);
3163 reverse_cache(di);
3164 return 1;
3165 }
3166
3167 static uim_lisp
skk_read_personal_dictionary(uim_lisp skk_dic_,uim_lisp fn_)3168 skk_read_personal_dictionary(uim_lisp skk_dic_, uim_lisp fn_)
3169 {
3170 const char *fn;
3171 struct stat st;
3172 uim_lisp ret;
3173 dic_info *skk_dic = NULL;
3174
3175 if (PTRP(skk_dic_))
3176 skk_dic = C_PTR(skk_dic_);
3177
3178 fn = REFER_C_STR(fn_);
3179 ret = (stat(fn, &st) != -1) ? uim_scm_t() : uim_scm_f();
3180
3181 update_personal_dictionary_cache_with_file(skk_dic, fn, 1);
3182 #if USE_SKK_JISYO_S_BUF
3183 update_personal_dictionary_cache_with_file(skk_dic, SKK_JISYO_S, 0);
3184 #endif
3185
3186 return ret;
3187 }
3188
push_back_candidate_array_to_sl(struct skk_line * sl,struct skk_cand_array * src_ca)3189 static void push_back_candidate_array_to_sl(struct skk_line *sl,
3190 struct skk_cand_array *src_ca)
3191 {
3192 int i;
3193 struct skk_cand_array *ca;
3194
3195 sl->nr_cand_array++;
3196 sl->cands = uim_realloc(sl->cands,
3197 sizeof(struct skk_cand_array) * sl->nr_cand_array);
3198 ca = &sl->cands[sl->nr_cand_array - 1];
3199 ca->is_used = src_ca->is_used;
3200 ca->nr_cands = src_ca->nr_cands;
3201 ca->cands = uim_malloc(sizeof(char *) * src_ca->nr_cands);
3202 for (i = 0; i < ca->nr_cands; i++)
3203 ca->cands[i] = uim_strdup(src_ca->cands[i]);
3204
3205 ca->nr_real_cands = src_ca->nr_real_cands;
3206 ca->okuri = uim_strdup(src_ca->okuri);
3207 ca->line = sl;
3208 }
3209
compare_and_merge_skk_line(dic_info * skk_dic,struct skk_line * dst_sl,struct skk_line * src_sl)3210 static void compare_and_merge_skk_line(dic_info *skk_dic,
3211 struct skk_line *dst_sl,
3212 struct skk_line *src_sl)
3213 {
3214 int i, j;
3215 struct skk_cand_array *dst_ca, *src_ca;
3216
3217 if (dst_sl == NULL || src_sl == NULL)
3218 return;
3219
3220 src_ca = &src_sl->cands[0];
3221 dst_ca = &dst_sl->cands[0];
3222 /*
3223 * check all candidate array since purged words may exist.
3224 */
3225 /* if (src_ca->nr_real_cands >= dst_ca->nr_real_cands) */
3226 merge_real_candidate_array(skk_dic, src_ca, dst_ca);
3227
3228 for (i = 1; i < src_sl->nr_cand_array; i++) {
3229 int dup = 0;
3230 src_ca = &src_sl->cands[i];
3231
3232 for (j = 1; j < dst_sl->nr_cand_array; j++) {
3233 dst_ca = &dst_sl->cands[j];
3234 if (!strcmp(src_ca->okuri, dst_ca->okuri)) {
3235 dup = 1;
3236 /* if (src_ca->nr_real_cands >= dst_ca->nr_real_cands) */
3237 merge_real_candidate_array(skk_dic, src_ca, dst_ca);
3238 }
3239 }
3240 if (!dup)
3241 push_back_candidate_array_to_sl(dst_sl, src_ca);
3242 }
3243
3244 dst_sl->state |= src_sl->state;
3245 }
3246
3247 /* for merge sort */
3248 static int
compare_entry(struct skk_line * p,struct skk_line * q)3249 compare_entry(struct skk_line *p, struct skk_line *q)
3250 {
3251 int ret;
3252 ret = strcmp(p->head, q->head);
3253
3254 if (ret != 0)
3255 return ret;
3256 else
3257 return p->okuri_head - q->okuri_head;
3258 }
3259
3260 /*
3261 * Retern lines with differential "midashi-go" between two personal
3262 * dictionaly caches. Also merge candidate arrays for line with same
3263 * "midashi-go". p and q are needed to be sorted.
3264 */
3265 static struct skk_line *
cache_line_diffs(dic_info * skk_dic,struct skk_line * p,struct skk_line * q,int * len)3266 cache_line_diffs(dic_info *skk_dic, struct skk_line *p, struct skk_line *q, int *len)
3267 {
3268 struct skk_line *r, *s, head;
3269 int cmp;
3270
3271 for (r = &head; p && q; ) {
3272 cmp = compare_entry(p, q);
3273 if (cmp < 0) {
3274 p = p->next;
3275 } else if (cmp > 0) {
3276 s = copy_skk_line(q);
3277 r->next = s;
3278 r = s;
3279 q = q->next;
3280 (*len)++;
3281 } else {
3282 compare_and_merge_skk_line(skk_dic, p, q);
3283 p = p->next;
3284 q = q->next;
3285 }
3286 }
3287 while (q) {
3288 s = copy_skk_line(q);
3289 r->next = s;
3290 r = s;
3291 q = q->next;
3292 (*len)++;
3293 }
3294 r->next = NULL;
3295 return head.next;
3296 }
3297
3298 /* for merge sort */
3299 static struct skk_line *
lmerge(struct skk_line * p,struct skk_line * q)3300 lmerge(struct skk_line *p, struct skk_line *q)
3301 {
3302 struct skk_line *r, head;
3303
3304 for (r = &head; p && q; ) {
3305 if (compare_entry(p, q) < 0) {
3306 r->next = p;
3307 r = p;
3308 p = p->next;
3309 } else {
3310 r->next = q;
3311 r = q;
3312 q = q->next;
3313 }
3314 }
3315 r->next = (p ? p : q);
3316 return head.next;
3317 }
3318
3319 /* merge sort */
3320 static struct skk_line *
lsort(struct skk_line * p)3321 lsort(struct skk_line *p)
3322 {
3323 struct skk_line *q, *r;
3324
3325 if (p) {
3326 q = p;
3327 for (r = q->next; r && (r = r->next) != NULL; r = r->next)
3328 q = q->next;
3329 r = q->next;
3330 q->next = NULL;
3331 if (r)
3332 p = lmerge(lsort(r), lsort(p));
3333 }
3334 return p;
3335 }
3336
3337 static void
update_personal_dictionary_cache_with_file(dic_info * skk_dic,const char * fn,int is_personal)3338 update_personal_dictionary_cache_with_file(dic_info *skk_dic, const char *fn,
3339 int is_personal)
3340 {
3341 dic_info *di;
3342 struct skk_line *sl, *tmp, *diff, **cache_array;
3343 int i, diff_len = 0;
3344
3345 di = (dic_info *)uim_malloc(sizeof(dic_info));
3346 di->cache_len = 0;
3347 di->head.next = NULL;
3348
3349 if (!read_dictionary_file(di, fn, is_personal)) {
3350 free(di);
3351 return;
3352 }
3353
3354 /* If no cache is available, just use new one. */
3355 if (!skk_dic->head.next) {
3356 skk_dic->head.next = di->head.next;
3357 skk_dic->cache_len = di->cache_len;
3358 skk_dic->cache_modified = di->cache_modified;
3359 skk_dic->personal_dic_timestamp = di->personal_dic_timestamp;
3360 free(di);
3361 return;
3362 }
3363
3364 /* keep original sequence of cache */
3365 cache_array = (struct skk_line **)uim_malloc(sizeof(struct skk_line *)
3366 * skk_dic->cache_len);
3367
3368 i = 0;
3369 sl = skk_dic->head.next;
3370 while (sl) {
3371 cache_array[i] = sl;
3372 sl = sl->next;
3373 i++;
3374 }
3375
3376 /* get differential lines and merge candidate */
3377 di->head.next = lsort(di->head.next);
3378 skk_dic->head.next = lsort(skk_dic->head.next);
3379 diff = cache_line_diffs(skk_dic, skk_dic->head.next, di->head.next, &diff_len);
3380
3381 /* revert sequence of the cache */
3382 if (skk_dic->cache_len) {
3383 sl = skk_dic->head.next = cache_array[0];
3384 for (i = 0; i < skk_dic->cache_len - 1; i++) {
3385 sl->next = cache_array[i + 1];
3386 sl = sl->next;
3387 }
3388 sl->next = NULL;
3389 }
3390
3391 if (is_personal) {
3392 /* prepend differential lines at the top of the cache */
3393 if (diff != NULL) {
3394 sl = diff;
3395 while (sl->next) {
3396 sl = sl->next;
3397 }
3398 sl->next = skk_dic->head.next;
3399 skk_dic->head.next = diff;
3400 skk_dic->cache_len += diff_len;
3401 }
3402 } else {
3403 /* append differential lines at the bottom of the cache */
3404 if (skk_dic->head.next)
3405 sl->next = diff;
3406 else
3407 skk_dic->head.next = diff;
3408 skk_dic->cache_len += diff_len;
3409 }
3410
3411 skk_dic->cache_modified = 1;
3412
3413 sl = di->head.next;
3414 while (sl) {
3415 tmp = sl;
3416 sl = sl->next;
3417 free_skk_line(tmp);
3418 }
3419 free(di);
3420 free(cache_array);
3421 }
3422
3423 static uim_lisp
skk_save_personal_dictionary(uim_lisp skk_dic_,uim_lisp fn_)3424 skk_save_personal_dictionary(uim_lisp skk_dic_, uim_lisp fn_)
3425 {
3426 FILE *fp;
3427 const char *fn = REFER_C_STR(fn_);
3428 char tmp_fn[MAXPATHLEN];
3429 struct skk_line *sl;
3430 struct stat st;
3431 int lock_fd = -1;
3432 mode_t umask_val;
3433 dic_info *skk_dic = NULL;
3434
3435 if (PTRP(skk_dic_))
3436 skk_dic = C_PTR(skk_dic_);
3437
3438 if (!skk_dic || skk_dic->cache_modified == 0)
3439 return uim_scm_f();
3440
3441 if (fn) {
3442 if (stat(fn, &st) != -1) {
3443 if (st.st_mtime != skk_dic->personal_dic_timestamp)
3444 update_personal_dictionary_cache_with_file(skk_dic, fn, 1);
3445 }
3446
3447 lock_fd = open_lock(fn, F_WRLCK);
3448
3449 snprintf(tmp_fn, sizeof(tmp_fn), "%s.tmp", fn);
3450 umask_val = umask(S_IRGRP | S_IROTH | S_IWGRP | S_IWOTH);
3451 fp = fopen(tmp_fn, "w");
3452 umask(umask_val);
3453 if (!fp)
3454 goto error;
3455
3456 } else {
3457 fp = stdout;
3458 }
3459
3460 for (sl = skk_dic->head.next; sl; sl = sl->next) {
3461 if (sl->state & SKK_LINE_NEED_SAVE)
3462 write_out_line(fp, sl);
3463 }
3464
3465 if (fflush(fp) != 0)
3466 goto error;
3467
3468 if (fsync(fileno(fp)) != 0)
3469 goto error;
3470
3471 if (fclose(fp) != 0)
3472 goto error;
3473
3474 if (rename(tmp_fn, fn) != 0)
3475 goto error;
3476
3477 if (stat(fn, &st) != -1) {
3478 skk_dic->personal_dic_timestamp = st.st_mtime;
3479 skk_dic->cache_modified = 0;
3480 }
3481
3482 error:
3483 close_lock(lock_fd);
3484 return uim_scm_f();
3485 }
3486
3487 static uim_lisp
skk_get_annotation(uim_lisp str_)3488 skk_get_annotation(uim_lisp str_)
3489 {
3490 const char *str, *sep;
3491 uim_lisp res;
3492
3493 if (str_ == uim_scm_null())
3494 return uim_scm_null();
3495
3496 str = REFER_C_STR(str_);
3497 sep = strrchr(str, ';');
3498 if (sep && (*(++sep) != '\0')) {
3499 res = MAKE_STR(sep);
3500 } else {
3501 res = MAKE_STR("");
3502 }
3503 return res;
3504 }
3505
3506 static uim_lisp
skk_remove_annotation(uim_lisp str_)3507 skk_remove_annotation(uim_lisp str_)
3508 {
3509 char *str, *sep;
3510
3511 if (str_ == uim_scm_null())
3512 return uim_scm_null();
3513
3514 str = C_STR(str_);
3515 sep = strrchr(str, ';');
3516 if (sep && (*(sep + 1) != '\0')) {
3517 *sep = '\0';
3518 }
3519 return MAKE_STR_DIRECTLY(str);
3520 }
3521
3522 static char *
eval_candidate_with_concat(const char * cand)3523 eval_candidate_with_concat(const char *cand)
3524 {
3525 char *p, *q, *str;
3526 char *expanded_str;
3527 size_t len;
3528
3529 if ((p = strstr(cand, "(concat \"")) == NULL)
3530 return NULL;
3531
3532 /* check close paren */
3533 q = strrchr(p, ')');
3534 if (!q || (strstr(p, "\")") == NULL))
3535 return NULL;
3536
3537 /* ignore make-string */
3538 if (strstr(p, "make-string"))
3539 return NULL;
3540
3541 /* get quoted str */
3542 len = (q - p + 1) - strlen("(concat \"\")");
3543 str = uim_malloc(len + 1);
3544 strlcpy(str, p + strlen("(concat \""), len + 1);
3545
3546 expanded_str = expand_str(str);
3547 if (!expanded_str) {
3548 free(str);
3549 return NULL;
3550 }
3551
3552 /* get evaluated candidate */
3553 len = p - cand + strlen(expanded_str);
3554 if (len > strlen(str))
3555 str = uim_realloc(str, len + 1);
3556
3557 if (p != cand) {
3558 strlcpy(str, cand, p - cand + 1);
3559 strcat(str, expanded_str);
3560 } else {
3561 strcpy(str, expanded_str);
3562 }
3563
3564 free(expanded_str);
3565 return str;
3566 }
3567
3568 static uim_lisp
skk_eval_candidate(uim_lisp str_)3569 skk_eval_candidate(uim_lisp str_)
3570 {
3571 const char *cand;
3572 char *str;
3573
3574 if (str_ == uim_scm_null())
3575 return uim_scm_null();
3576
3577 cand = REFER_C_STR(str_);
3578
3579 /* eval concat only for now */
3580 str = eval_candidate_with_concat(cand);
3581 if (!str)
3582 return str_;
3583
3584 return MAKE_STR_DIRECTLY(str);
3585 }
3586
3587 /* only for siod */
3588 static uim_lisp
skk_substring(uim_lisp str_,uim_lisp start_,uim_lisp end_)3589 skk_substring(uim_lisp str_, uim_lisp start_, uim_lisp end_)
3590 {
3591 const char *str;
3592 char *s;
3593 int start;
3594 int end;
3595 int len;
3596 int i, j = 0;
3597
3598 str = REFER_C_STR(str_);
3599 start = C_INT(start_);
3600 end = C_INT(end_);
3601
3602 if (!str || start < 0 || start > end)
3603 return MAKE_STR("");
3604
3605 len = strlen(str);
3606
3607 if (end > len)
3608 return MAKE_STR("");
3609
3610 s = uim_malloc(end - start + 1);
3611
3612 for (i = start; i < end; i++) {
3613 s[j] = str[i];
3614 j++;
3615 }
3616 s[j] = '\0';
3617 return MAKE_STR_DIRECTLY(s);
3618 }
3619
3620 static uim_lisp
skk_look_open(uim_lisp fn_)3621 skk_look_open(uim_lisp fn_)
3622 {
3623 const char *fn = REFER_C_STR(fn_);
3624
3625 if (use_look == 1 && skk_look_ctx)
3626 uim_look_finish(skk_look_ctx);
3627
3628 if ((skk_look_ctx = uim_look_init()) == NULL) {
3629 use_look = 0;
3630 uim_fatal_error("uim_look_init() failed");
3631 return uim_scm_f();
3632 }
3633
3634 if (!uim_look_open_dict(fn, skk_look_ctx)) {
3635 uim_look_finish(skk_look_ctx);
3636 skk_look_ctx = NULL;
3637 use_look = 0;
3638 return uim_scm_f();
3639 }
3640
3641 use_look = 1;
3642 return uim_scm_t();
3643 }
3644
3645 static uim_lisp
skk_look_close()3646 skk_look_close()
3647 {
3648 if (use_look && skk_look_ctx) {
3649 uim_look_finish(skk_look_ctx);
3650 skk_look_ctx = NULL;
3651 use_look = 0;
3652 }
3653
3654 return uim_scm_f();
3655 }
3656
3657 static uim_lisp
look_get_top_word(const char * str)3658 look_get_top_word(const char *str)
3659 {
3660 char buf[512], *dict_str;
3661 int i = 0;
3662 size_t len;
3663 uim_lisp ret_ = uim_scm_f();
3664
3665 while (str[i] != '\0') {
3666 if (!skk_isalpha(str[i]))
3667 return ret_;
3668 i++;
3669 }
3670
3671 if (!use_look)
3672 return ret_;
3673
3674 dict_str = uim_strdup(str);
3675
3676 uim_look_reset(skk_look_ctx);
3677 if (uim_look(dict_str, skk_look_ctx) != 0) {
3678 len = strlen(str);
3679 uim_look_set(skk_look_ctx);
3680 while (uim_look_get(dict_str, buf, sizeof(buf), skk_look_ctx) != 0) {
3681 /* don't use the word itself */
3682 if (strcasecmp(buf, dict_str) != 0) {
3683 /* overwrite upper and lower case */
3684 if (len < strlen(buf))
3685 memcpy(buf, str, len);
3686 ret_ = MAKE_STR(buf);
3687 break;
3688 }
3689 }
3690 }
3691 free(dict_str);
3692 return ret_;
3693 }
3694
3695 static void
look_get_comp(struct skk_comp_array * ca,const char * str)3696 look_get_comp(struct skk_comp_array *ca, const char *str)
3697 {
3698 char buf[512], *dict_str;
3699 int i = 0, nr_pre;
3700 int *matched;
3701 size_t len;
3702
3703 while (str[i] != '\0') {
3704 if (!skk_isalpha(str[i]))
3705 return;
3706 i++;
3707 }
3708
3709 if (!use_look)
3710 return ;
3711
3712 dict_str = uim_strdup(str);
3713
3714 uim_look_reset(skk_look_ctx);
3715 if (uim_look(dict_str, skk_look_ctx) == 0)
3716 return;
3717
3718 nr_pre = ca->nr_comps;
3719 matched = uim_malloc(sizeof(int) * nr_pre);
3720 for (i = 0; i < nr_pre; i++)
3721 matched[i] = 0;
3722
3723 uim_look_set(skk_look_ctx);
3724 len = strlen(str);
3725 while (uim_look_get(dict_str, buf, sizeof(buf), skk_look_ctx) != 0) {
3726 int match = 0;
3727
3728 /* don't use the word itself */
3729 if (strcasecmp(buf, dict_str) == 0)
3730 continue;
3731
3732 /* overwrite upper and lower case */
3733 if (len < strlen(buf))
3734 memcpy(buf, str, len);
3735
3736 /* skip words already in the cache */
3737 for (i = 0; i < nr_pre; i++) {
3738 if (matched[i])
3739 continue;
3740 if (!strcasecmp(ca->comps[i], buf)) {
3741 matched[i] = 1;
3742 match = 1;
3743 break;
3744 }
3745 }
3746 if (!match) {
3747 ca->nr_comps++;
3748 ca->comps = uim_realloc(ca->comps, sizeof(char *) * ca->nr_comps);
3749 ca->comps[ca->nr_comps - 1] = uim_strdup(buf);
3750 }
3751 }
3752
3753 free(matched);
3754 free(dict_str);
3755 }
3756
3757 void
uim_plugin_instance_init(void)3758 uim_plugin_instance_init(void)
3759 {
3760 uim_scm_init_proc5("skk-lib-dic-open", skk_dic_open);
3761 uim_scm_init_proc1("skk-lib-free-dic", skk_free_dic);
3762 uim_scm_init_proc2("skk-lib-read-personal-dictionary", skk_read_personal_dictionary);
3763 uim_scm_init_proc2("skk-lib-save-personal-dictionary", skk_save_personal_dictionary);
3764 uim_scm_init_proc5("skk-lib-get-entry", skk_get_entry);
3765 uim_scm_init_proc1("skk-lib-store-replaced-numstr", skk_store_replaced_numeric_str);
3766 uim_scm_init_proc2("skk-lib-merge-replaced-numstr", skk_merge_replaced_numeric_str);
3767 uim_scm_init_proc1("skk-lib-replace-numeric", skk_replace_numeric);
3768 uim_scm_init_proc5("skk-lib-get-nth-candidate", skk_get_nth_candidate);
3769 uim_scm_init_proc5("skk-lib-get-nr-candidates", skk_get_nr_candidates);
3770 uim_scm_init_proc5("skk-lib-commit-candidate", skk_commit_candidate);
3771 uim_scm_init_proc5("skk-lib-purge-candidate", skk_purge_candidate);
3772 uim_scm_init_proc5("skk-lib-learn-word", skk_learn_word);
3773 uim_scm_init_proc1("skk-lib-get-annotation", skk_get_annotation);
3774 uim_scm_init_proc1("skk-lib-remove-annotation", skk_remove_annotation);
3775 uim_scm_init_proc4("skk-lib-get-completion", skk_get_completion);
3776 uim_scm_init_proc5("skk-lib-get-nth-completion", skk_get_nth_completion);
3777 uim_scm_init_proc4("skk-lib-get-nr-completions", skk_get_nr_completions);
3778 uim_scm_init_proc2("skk-lib-clear-completions", skk_clear_completions);
3779 uim_scm_init_proc4("skk-lib-get-dcomp-word", skk_get_dcomp_word);
3780 uim_scm_init_proc1("skk-lib-eval-candidate", skk_eval_candidate);
3781 uim_scm_init_proc3("skk-lib-substring", skk_substring);
3782 uim_scm_init_proc1("skk-lib-look-open", skk_look_open);
3783 uim_scm_init_proc0("skk-lib-look-close", skk_look_close);
3784 }
3785
3786 void
uim_plugin_instance_quit(void)3787 uim_plugin_instance_quit(void)
3788 {
3789 }
3790
3791 /* skkserv related */
3792 static int
open_skkserv(const char * hostname,int portnum,int family)3793 open_skkserv(const char *hostname, int portnum, int family)
3794 {
3795 int sock = -1;
3796 struct addrinfo hints, *aitop, *ai;
3797 char port[BUFSIZ];
3798 int error;
3799 int enable_completion;
3800
3801 (void)snprintf(port, sizeof(port), "%d", portnum);
3802
3803 memset(&hints, 0, sizeof(hints));
3804 hints.ai_family = family;
3805 hints.ai_flags = AI_PASSIVE;
3806 hints.ai_socktype = SOCK_STREAM;
3807
3808 if ((error = getaddrinfo(hostname, port, &hints, &aitop))) {
3809 uim_notify_fatal("uim-skk: %s", gai_strerror(error));
3810 return 0;
3811 }
3812
3813 for (ai = aitop; ai; ai = ai->ai_next) {
3814 if (ai->ai_family != AF_INET && ai->ai_family != AF_INET6)
3815 continue;
3816
3817 if ((sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
3818 continue;
3819
3820 if (connect(sock, ai->ai_addr, ai->ai_addrlen) == 0)
3821 break;
3822
3823 close(sock);
3824 sock = -1;
3825 }
3826
3827 freeaddrinfo(aitop);
3828
3829 if (sock == -1) {
3830 uim_notify_fatal(_("uim-skk: connect to %s port %s failed"), hostname, port);
3831 return 0;
3832 }
3833
3834 #if 0
3835 uim_notify_info("uim-skk: SKKSERVER=%s", hostname);
3836 #endif
3837 skkservsock = sock;
3838 rserv = fdopen(sock, "r");
3839 wserv = fdopen(sock, "w");
3840
3841 enable_completion =
3842 uim_scm_symbol_value_bool("skk-skkserv-enable-completion?") ?
3843 SKK_SERV_TRY_COMPLETION : 0;
3844 return SKK_SERV_CONNECTED | enable_completion;
3845 }
3846
3847 static void
close_skkserv()3848 close_skkserv()
3849 {
3850 if (skkservsock >= 0) {
3851 fprintf(wserv, "0\n");
3852 fflush(wserv);
3853 close(skkservsock);
3854 skkservsock = -1;
3855 }
3856 }
3857
3858 static void
reset_is_used_flag_of_cache(dic_info * di)3859 reset_is_used_flag_of_cache(dic_info *di)
3860 {
3861 struct skk_line *sl;
3862 int i;
3863
3864 sl = di->head.next;
3865 while (sl) {
3866 for (i = 0; i < sl->nr_cand_array; i++) {
3867 struct skk_cand_array *ca = &sl->cands[i];
3868 ca->is_used = 0;
3869 }
3870 sl = sl->next;
3871 }
3872 }
3873
3874 static void
skkserv_disconnected(dic_info * di)3875 skkserv_disconnected(dic_info *di)
3876 {
3877 di->skkserv_state &= ~SKK_SERV_CONNECTED;
3878 reset_is_used_flag_of_cache(di);
3879 }
3880