1 /*
2  * Copyright (c) 2003 Nara Institute of Science and Technology
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name Nara Institute of Science and Technology may not be used to
15  *    endorse or promote products derived from this software without
16  *    specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY Nara Institute of Science and Technology
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
21  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE Nara Institute
22  * of Science and Technology BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $Id: iotool.c,v 1.1.1.1 2007/03/13 07:40:10 masayu-a Exp $
31  */
32 
33 #include "config.h"
34 
35 #include <stdio.h>
36 #include <stdarg.h>
37 #ifdef HAVE_WINDOWS_H
38 #include <windows.h>
39 #endif
40 #include "chadic.h"
41 #include "literal.h"
42 
43 #ifdef PATHTYPE_MSDOS
44 #define RCFILE "\\chasenrc"
45 #define RC2FILE "\\chasen2rc"
46 #else
47 #define RCFILE "/.chasenrc"
48 #define RC2FILE "/.chasen2rc"
49 #endif
50 
51 #if defined HAVE_WINDOWS_H && !defined __CYGWIN__
52 #define REG_PATH "Software\\NAIST\\ChaSen"
53 #define REG_RC "chasenrc"
54 #define REG_GRAMMAR "grammar"
55 #endif
56 
57 int Cha_lineno, Cha_lineno_error;
58 int Cha_errno = 0;
59 
60 static FILE *cha_stderr = NULL;
61 static char progpath[PATH_MAX] = "chasen";
62 static char filepath[PATH_MAX];
63 static char grammar_dir[PATH_MAX];
64 static char chasenrc_path[PATH_MAX];
65 
66 /*
67  * cha_convert_escape - convert escape characters
68  */
69 /* XXX: not Shift-JIS safe */
70 char *
cha_convert_escape(char * str,int ctrl_only)71 cha_convert_escape(char *str, int ctrl_only)
72 {
73     char *s1, *s2;
74 
75     for (s1 = s2 = str; *s1; s1++, s2++) {
76 	if (*s1 != '\\')
77 	    *s2 = *s1;
78 	else {
79 	    switch (*++s1) {
80 	    case 't':
81 		*s2 = '\t';
82 		break;
83 	    case 'n':
84 		*s2 = '\n';
85 		break;
86 	    default:
87 		if (ctrl_only)
88 		    *s2++ = '\\';
89 		*s2 = *s1;
90 		break;
91 	    }
92 	}
93     }
94     *s2 = '\0';
95 
96     return str;
97 }
98 
99 /*
100  * cha_set_progpath - set program pathname
101  *
102  *	progpath is used in cha_exit() and cha_exit_file()
103  */
104 void
cha_set_progpath(char * path)105 cha_set_progpath(char *path)
106 {
107 #if defined _WIN32 && ! defined __CYGWIN__
108     GetModuleFileName(GetModuleHandle(NULL), progpath, PATH_MAX);
109 #else /* not _WIN32 */
110     strncpy(progpath, path, PATH_MAX);
111 #endif /* _WIN32 */
112 }
113 
114 /*
115  * cha_set_rcpath - set chasenrc file path
116  *
117  *	this function is called when -r option is used.
118  */
119 void
cha_set_rcpath(char * filename)120 cha_set_rcpath(char *filename)
121 {
122     strncpy(chasenrc_path, filename, PATH_MAX);
123 }
124 
125 /*
126  * cha_get_rcpath
127  *
128  *	called only from chasen.c
129  */
130 char *
cha_get_rcpath(void)131 cha_get_rcpath(void)
132 {
133     return chasenrc_path;
134 }
135 
136 /*
137  * cha_get_grammar_dir
138  *
139  *	called only from chasen.c
140  */
141 char *
cha_get_grammar_dir(void)142 cha_get_grammar_dir(void)
143 {
144     return grammar_dir;
145 }
146 
147 /*
148  * cha_fopen - open file, or error end
149  *
150  * inputs:
151  *	ret - exit code (don't exit if ret < 0)
152  */
153 FILE *
cha_fopen(char * filename,char * mode,int ret)154 cha_fopen(char *filename, char *mode, int ret)
155 {
156     FILE *fp;
157 
158     if (filename[0] == '-' && filename[1] == '\0')
159 	return stdin;
160 
161     if ((fp = fopen(filename, mode)) != NULL) {
162 	/*
163 	 * filepath is used in cha_exit_file()
164 	 */
165 	if (*mode == 'r') {
166 	    if (filename != filepath)
167 		strncpy(filepath, filename, PATH_MAX);
168 	    Cha_lineno = Cha_lineno_error = 0;
169 	}
170     } else if (ret >= 0)
171 	cha_exit_perror(filename);
172 
173     return fp;
174 }
175 
176 /*
177  * cha_fopen_grammar - open file from current or grammar directory
178  *
179  * inputs:
180  *	dir - 0: read from current directory
181  *	      1: read from grammar directory
182  *	      2: read from current directory or grammar directory
183  *
184  *	ret - return the code when fopen() fails
185  *
186  * outputs:
187  *	filepathp - file path string
188  */
189 FILE *
cha_fopen_grammar(char * filename,char * mode,int ret,int dir,char ** filepathp)190 cha_fopen_grammar(char *filename, char *mode, int ret, int dir,
191 		  char **filepathp)
192 {
193     FILE *fp;
194 
195     *filepathp = filename;
196     switch (dir) {
197     case 0:
198 	/*
199 	 * �����ȥǥ��쥯�ȥ꤫���ɤ߹���
200 	 */
201 	return cha_fopen(filename, mode, ret);
202     case 2:
203 	/*
204 	 * �����ȥǥ��쥯�ȥ꤫���ɤ߹���
205 	 */
206 	if ((fp = cha_fopen(filename, mode, -1)) != NULL)
207 	    return fp;
208 	/*
209 	 * FALLTHRU
210 	 */
211     default:			/* should be 1 */
212 	/*
213 	 * ʸˡ�ǥ��쥯�ȥ꤫���ɤ߹���
214 	 * ʸˡ�ǥ��쥯�ȥ꤬���ꤵ��Ƥ��ʤ���� .chasenrc ���ɤ߹���
215 	 */
216 	if (grammar_dir[0] == '\0')
217 	    cha_read_grammar_dir();
218 	snprintf(filepath, PATH_MAX, "%s%s", grammar_dir, filename);
219 	*filepathp = filepath;
220 	return cha_fopen(filepath, mode, ret);
221     }
222 }
223 
224 /*
225  * cha_malloc()
226  */
227 void *
cha_malloc(size_t n)228 cha_malloc(size_t n)
229 {
230     void *p;
231 
232     if ((p = malloc(n)) == NULL)
233 	cha_exit_perror("malloc");
234 
235     return p;
236 }
237 
238 void *
cha_realloc(void * ptr,size_t n)239 cha_realloc(void *ptr, size_t n)
240 {
241     void *p;
242 
243     if ((p = realloc(ptr, n)) == NULL)
244 	cha_exit_perror("realloc");
245 
246     return p;
247 }
248 
249 #define CHA_MALLOC_SIZE (1024 * 64)
250 static char *
cha_malloc_char(int size)251 cha_malloc_char(int size)
252 {
253     static int idx = CHA_MALLOC_SIZE;
254     static char *ptr;
255 
256     if (idx + size >= CHA_MALLOC_SIZE) {
257 	ptr = (char *) cha_malloc(CHA_MALLOC_SIZE);
258 	idx = 0;
259     }
260 
261     idx += size;
262     return ptr + idx - size;
263 }
264 
265 char *
cha_strdup(char * str)266 cha_strdup(char *str)
267 {
268     char *newstr;
269 
270     newstr = cha_malloc_char(strlen(str) + 1);
271     strcpy(newstr, str);
272 
273     return newstr;
274 }
275 
276 /*
277  * cha_exit() - print error messages on stderr and exit
278  */
279 void
cha_set_stderr(FILE * fp)280 cha_set_stderr(FILE * fp)
281 {
282     cha_stderr = fp;
283 }
284 
285 void
cha_exit(int status,char * format,...)286 cha_exit(int status, char *format, ...)
287 {
288     va_list ap;
289 
290     if (Cha_errno)
291 	return;
292 
293     if (!cha_stderr)
294 	cha_stderr = stderr;
295     else if (cha_stderr != stderr)
296 	fputs("500 ", cha_stderr);
297 
298     if (progpath)
299 	fprintf(cha_stderr, "%s: ", progpath);
300     va_start(ap, format);
301     vfprintf(cha_stderr, format, ap);
302     va_end(ap);
303     if (status >= 0) {
304 	fputc('\n', cha_stderr);
305 	if (cha_stderr == stderr)
306 	    exit(status);
307 	Cha_errno = 1;
308     }
309 }
310 
311 void
cha_exit_file(int status,char * format,...)312 cha_exit_file(int status, char *format, ...)
313 {
314     va_list ap;
315 
316     if (Cha_errno)
317 	return;
318 
319     if (!cha_stderr)
320 	cha_stderr = stderr;
321     else if (cha_stderr != stderr)
322 	fputs("500 ", cha_stderr);
323 
324     if (progpath)
325 	fprintf(cha_stderr, "%s: ", progpath);
326 
327     if (Cha_lineno == 0)
328 	;	/* do nothing */
329     else if (Cha_lineno == Cha_lineno_error)
330 	fprintf(cha_stderr, "%s:%d: ", filepath, Cha_lineno);
331     else
332 	fprintf(cha_stderr, "%s:%d-%d: ", filepath, Cha_lineno_error,
333 		Cha_lineno);
334 
335     va_start(ap, format);
336     vfprintf(cha_stderr, format, ap);
337     va_end(ap);
338 
339     if (status >= 0) {
340 	fputc('\n', cha_stderr);
341 	if (cha_stderr == stderr)
342 	    exit(status);
343 	Cha_errno = 1;
344     }
345 }
346 
347 void
cha_perror(char * s)348 cha_perror(char *s)
349 {
350     cha_exit(-1, "");
351     perror(s);
352 }
353 
354 void
cha_exit_perror(char * s)355 cha_exit_perror(char *s)
356 {
357     cha_perror(s);
358     exit(1);
359 }
360 
361 FILE *
cha_fopen_rcfile(void)362 cha_fopen_rcfile(void)
363 {
364     FILE *fp;
365     char *home_dir, *rc_env, *getenv();
366 
367     /*
368      * -R option (standard alone)
369      */
370     if (!strcmp(chasenrc_path, "*")) {
371 #if defined HAVE_WINDOWS_H && !defined __CYGWIN__
372 	if ((cha_read_registry(REG_PATH, REG_RC, chasenrc_path) != NULL) &&
373 	    ((fp = cha_fopen(chasenrc_path, "r", -1)) != NULL)) {
374 	    return fp;
375 	}
376 #endif
377 	strncpy(chasenrc_path, RCPATH, PATH_MAX);
378 	if ((fp = cha_fopen(chasenrc_path, "r", -1)) != NULL)
379 	    return fp;
380 	cha_exit(1, "can't open %s", chasenrc_path);
381     }
382 
383     /*
384      * -r option
385      */
386     if (chasenrc_path[0])
387 	return cha_fopen(chasenrc_path, "r", 1);
388 
389     /*
390      * environment variable CHASENRC
391      */
392     if ((rc_env = getenv("CHASENRC")) != NULL) {
393 	strncpy(chasenrc_path, rc_env, PATH_MAX);
394 	return cha_fopen(chasenrc_path, "r", 1);
395     }
396 
397     /*
398      * .chasenrc in the home directory
399      */
400     if ((home_dir = getenv("HOME")) != NULL) {
401 	/*
402 	 * .chasenrc
403 	 */
404 	snprintf(chasenrc_path, PATH_MAX, "%s%s", home_dir, RC2FILE);
405 	if ((fp = cha_fopen(chasenrc_path, "r", -1)) != NULL)
406 	    return fp;
407 	snprintf(chasenrc_path, PATH_MAX, "%s%s", home_dir, RCFILE);
408 	if ((fp = cha_fopen(chasenrc_path, "r", -1)) != NULL)
409 	    return fp;
410     }
411 #ifdef PATHTYPE_MSDOS
412     else if ((home_dir = getenv("HOMEDRIVE")) != NULL) {
413 	snprintf(chasenrc_path, PATH_MAX,
414 		 "%s%s%s", home_dir, getenv("HOMEPATH"), RC2FILE);
415 	if ((fp = cha_fopen(chasenrc_path, "r", -1)) != NULL)
416 	    return fp;
417 	snprintf(chasenrc_path, PATH_MAX,
418 		 "%s%s%s", home_dir, getenv("HOMEPATH"), RCFILE);
419 	if ((fp = cha_fopen(chasenrc_path, "r", -1)) != NULL)
420 	    return fp;
421     }
422 #endif /* PATHTYPE_MSDOS */
423 
424 #if defined HAVE_WINDOWS_H && !defined __CYGWIN__
425     if ((cha_read_registry(REG_PATH, REG_RC, chasenrc_path) != NULL) &&
426 	((fp = cha_fopen(chasenrc_path, "r", -1)) != NULL)) {
427 	return fp;
428     }
429 #endif
430     strncpy(chasenrc_path, RCPATH, PATH_MAX);
431 
432     if ((fp = cha_fopen(chasenrc_path, "r", -1)) != NULL)
433 	return fp;
434 
435     cha_exit(1, "can't open chasenrc or %s", chasenrc_path);
436 
437     /*
438      * to avoid warning
439      */
440     return NULL;
441 }
442 
443 static void
add_delimiter(char * string)444 add_delimiter(char *string)
445 {
446     char *s = string + strlen(string);
447 
448     if (s[-1] != PATH_DELIMITER) {
449 	s[0] = PATH_DELIMITER;
450 	s[1] = '\0';
451     }
452 }
453 
454 /*
455  * read .chasenrc and set grammar directory
456  */
457 void
cha_read_grammar_dir(void)458 cha_read_grammar_dir(void)
459 {
460     FILE *fp;
461     chasen_cell_t *cell;
462 
463     fp = cha_fopen_rcfile();
464 
465     while (!cha_s_feof(fp)) {
466 	char *s;
467 	cell = cha_s_read(fp);
468 	s = cha_s_atom(cha_car(cell));
469 	if (cha_litmatch(s, 1, STR_GRAM_FILE)) {
470 	    strncpy(grammar_dir, cha_s_atom(cha_car(cha_cdr(cell))), PATH_MAX);
471 	    add_delimiter(grammar_dir);
472 	    break;
473 	}
474     }
475 
476     if (grammar_dir[0] == '\0') {
477 	char *s;
478 
479 #if defined HAVE_WINDOWS_H && !defined __CYGWIN__
480 	if (cha_read_registry(REG_PATH, REG_GRAMMAR,
481 			      grammar_dir) != NULL) {
482 	    if (grammar_dir[0] != '\0')
483 		add_delimiter(grammar_dir);
484 	} else {
485 #endif
486 	strncpy(grammar_dir, chasenrc_path, PATH_MAX);
487 	if ((s = strrchr(grammar_dir, PATH_DELIMITER)) != NULL)
488 	    s[1] = '\0';
489 	else
490 	    grammar_dir[0] = '\0';
491 #if defined HAVE_WINDOWS_H && !defined __CYGWIN__
492 	}
493 #endif
494     }
495 
496     fclose(fp);
497 }
498 
499 char *
cha_read_registry(char * path,char * name,char * val)500 cha_read_registry(char *path, char *name, char *val)
501 {
502 #if defined HAVE_WINDOWS_H && !defined __CYGWIN__
503     HKEY hKey;
504     DWORD size = PATH_MAX;
505 
506     if ((RegOpenKeyEx(HKEY_CURRENT_USER, path, 0,
507 		      KEY_QUERY_VALUE, &hKey) == ERROR_SUCCESS) &&
508 	(RegQueryValueEx(hKey, name, NULL, NULL, (LPBYTE)val, &size) ==
509 	 ERROR_SUCCESS)) {
510 	RegCloseKey(hKey);
511     } else
512 	val = NULL;
513 
514     return val;
515 #else
516     return NULL;
517 #endif
518 }
519