1 /*
2 *
3 * wakati.c -
4 *
5 * $Id: wakati.c,v 1.26.8.6 2007-12-05 16:50:47 opengl2772 Exp $
6 *
7 * Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved.
8 * Copyright (C) 2000,2001,2003,2007 Namazu Project All rights reserved.
9 * This is free software with ABSOLUTELY NO WARRANTY.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24 * 02111-1307, USA
25 *
26 *
27 */
28
29 #ifdef HAVE_CONFIG_H
30 # include "config.h"
31 #endif
32
33 #include <stdio.h>
34 #include <ctype.h>
35
36 #ifdef HAVE_STRING_H
37 # include <string.h>
38 #else
39 # include <strings.h>
40 #endif
41
42 #include "libnamazu.h"
43 #include "util.h"
44 #include "search.h"
45 #include "wakati.h"
46
47 /*
48 *
49 * Macros
50 *
51 */
52
53 #define iskanji(c) (nmz_iseuc(*(c)) && nmz_iseuc(*(c + 1)))
54
55
56 /*
57 *
58 * Private functions
59 *
60 */
61
62 static int detect_char_type(char *c);
63 static int iskatakana(const char *chr);
64 static int ishiragana(const char *chr);
65
66 static int
detect_char_type(char * c)67 detect_char_type(char *c)
68 {
69 if (iskatakana(c)) {
70 return KATAKANA;
71 } else if (ishiragana(c)){
72 return HIRAGANA;
73 } else if (iskanji(c)) {
74 return KANJI;
75 }
76 return OTHER;
77 }
78
79 static int
iskatakana(const char * chr)80 iskatakana(const char *chr)
81 {
82 uchar *c;
83 c = (uchar *)chr;
84
85 if ((*c == 0xa5) &&
86 (*(c + 1) >= 0xa0)) /* 0xa0 <= *(c + 1) <= 0xff */
87 {
88 return 1;
89 } else if ((*c == 0xa1) && (*(c + 1) == 0xbc)) { /* choon */
90 return 1;
91 } else {
92 ;
93 }
94
95 return 0;
96 }
97
98 static int
ishiragana(const char * chr)99 ishiragana(const char *chr)
100 {
101 uchar *c;
102 c = (uchar *)chr;
103
104 if ((*c == 0xa4) &&
105 (*(c + 1) >= 0xa0)) /* 0xa0 <= *(c + 1) <= 0xff */
106 {
107 return 1;
108 } else if ((*c == 0xa1) && (*(c + 1) == 0xbc)) { /* choon */
109 return 1;
110 } else {
111 ;
112 }
113 return 0;
114 }
115
116
117 /*
118 *
119 * Public functions
120 *
121 */
122
123 int
nmz_wakati(char * key)124 nmz_wakati(char *key)
125 {
126 int i, j, key_leng, type;
127 char buf[BUFSIZE * 2] = "";
128
129 nmz_debug_printf("wakati original: [%s].\n", key);
130
131 for (i = 0; i < (int)strlen(key); ) {
132 type = detect_char_type(key + i);
133 if (nmz_iseuc(*(key + i))) {
134 key_leng = 0;
135 for (j = 0; iskanji(key + i + j) ; j += 2) {
136 char tmp[BUFSIZE];
137
138 if (j == 0 && (iskatakana(key + i + j) ||
139 ishiragana(key + i + j)))
140 {
141 /* If beggining character is Katakana or Hiragana */
142 break;
143 }
144
145 strncpy(tmp, key + i, j + 2);
146 *(tmp + j + 2) = '\0';
147
148 if (nmz_binsearch(tmp, 0) != -1) {
149 key_leng = j + 2;
150 }
151 }
152
153 if (key_leng > 0) {
154 strncat(buf, key + i, key_leng);
155 strcat(buf, "\t");
156 i += key_leng;
157 } else {
158 if (type == HIRAGANA || type == KATAKANA) {
159 for (j =0; ; j += 2) {
160 if (!((type == HIRAGANA && ishiragana(key + i + j))
161 ||(type == KATAKANA && iskatakana(key + i + j))))
162 {
163 break;
164 }
165 strncat(buf, key + i + j, 2);
166 }
167 i += j;
168 strcat(buf, "\t");
169 } else {
170 strncat(buf, key + i, 2);
171 strcat(buf, "\t");
172 i += 2;
173 }
174 }
175 } else {
176 while(*(key + i) && !nmz_iseuc(*(key + i))) {
177 /* As an initial attempt always success,
178 outer 'for loop' can avoid infinite loop */
179 if (*(key + i) == '\t') {
180 nmz_chomp(buf);
181 }
182 strncat(buf, key + i, 1);
183 i++;
184 }
185 nmz_chomp(buf);
186 strcat(buf, "\t");
187 }
188 }
189 nmz_chomp(buf);
190
191 if (strlen(buf) <= BUFSIZE) {
192 strcpy(key, buf);
193 } else {
194 nmz_set_dyingmsg(nmz_msg("wakatigaki processing failed.\n"));
195 return 1;
196 }
197 nmz_debug_printf("wakatied string: [%s]\n", key);
198 return 0;
199 }
200
201
202