1 /*
2  *
3  * $Id: util.c,v 1.75.4.20 2008-07-10 19:46:45 opengl2772 Exp $
4  *
5  * Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved.
6  * Copyright (C) 2000-2008 Namazu Project All rights reserved.
7  * This is free software with ABSOLUTELY NO WARRANTY.
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22  * 02111-1307, USA
23  *
24  * nmz_scan_hex(), nmz_scan_oct(), nmz_xmalloc(), nmz_xrealloc() are
25  * originally imported from Ruby b19's "util.c" and "gc.c".
26  * Thanks to Mr. Yukihiro Matsumoto <matz@netlab.co.jp>.
27  *
28  */
29 
30 #ifdef HAVE_CONFIG_H
31 #  include "config.h"
32 #endif
33 #ifdef HAVE_SUPPORT_H
34 #  include "support.h"
35 #endif
36 
37 #include <assert.h>
38 #include <stdio.h>
39 #ifdef HAVE_STDLIB_H
40 #  include <stdlib.h>
41 #endif
42 #include <ctype.h>
43 #include <stdarg.h>
44 
45 #ifdef HAVE_ERRNO_H
46 #  include <errno.h>
47 #endif
48 
49 #ifdef __EMX__
50 #include <sys/types.h>
51 #endif
52 #include <sys/stat.h>
53 
54 #ifdef HAVE_STRING_H
55 #  include <string.h>
56 #else
57 #  include <strings.h>
58 #endif
59 
60 #include "libnamazu.h"
61 #include "util.h"
62 #include "i18n.h"
63 #include "var.h"
64 #include "system.h"
65 
66 /*
67  *
68  * Private functions
69  *
70  */
71 
72 static void reverse_byte_order (void*, size_t, size_t);
73 static char decode_uri_sub(char c1, char c2);
74 
75 /*
76  * Reverse byte order. It's type independent.
77  */
78 static void
reverse_byte_order(void * p,size_t n,size_t size)79 reverse_byte_order (void *p, size_t n, size_t size)
80 {
81     int i, j;
82     char *pp, tmp;
83 
84     pp = (char *)p;
85     for (i = 0; i < (int)n; i++) {
86         char *c = (pp + (i * size));
87         for (j = 0; j < (int)(size / 2); j++) {
88             tmp = *(c + j);
89             *(c + j)= *(c + size - 1 - j);
90             *(c + size - 1 - j) = tmp;
91         }
92     }
93 }
94 
95 static char
decode_uri_sub(char c1,char c2)96 decode_uri_sub(char c1, char c2)
97 {
98     char c;
99 
100     c = ((c1 >= 'A' ? (_nmz_toupper((unsigned char)c1) - 'A') + 10 : (c1 - '0'))) * 16;
101     c += ( c2 >= 'A' ? (_nmz_toupper((unsigned char)c2) - 'A') + 10 : (c2 - '0'));
102     return c;
103 }
104 
105 /*
106  *
107  * Public functions
108  *
109  */
110 
111 unsigned long
nmz_scan_oct(const char * start,int len,int * retlen)112 nmz_scan_oct(const char *start, int len, int *retlen)
113 {
114     register const char *s = start;
115     register unsigned long retval = 0;
116 
117     while (len-- && *s >= '0' && *s <= '7') {
118 	retval <<= 3;
119 	retval |= *s++ - '0';
120     }
121 
122     *retlen = (int)(s - start);
123     return retval;
124 }
125 
126 unsigned long
nmz_scan_hex(const char * start,int len,int * retlen)127 nmz_scan_hex(const char *start, int len, int *retlen)
128 {
129     static char hexdigit[] = "0123456789abcdef0123456789ABCDEFx";
130     register const char *s = start;
131     register unsigned long retval = 0;
132     char *tmp;
133 
134     while (len-- && *s && (tmp = strchr(hexdigit, *s))) {
135 	retval <<= 4;
136 	retval |= (tmp - hexdigit) & 15;
137 	s++;
138     }
139     *retlen = (int)(s - start);
140     return retval;
141 }
142 
143 
144 static unsigned long malloc_memories = 0;
145 
146 void *
nmz_xmalloc(unsigned long size)147 nmz_xmalloc(unsigned long size)
148 {
149     void *mem;
150 
151     if (size == 0) size = 1;
152     malloc_memories += size;
153     mem = malloc(size);
154 
155     return mem;
156 }
157 
158 void *
nmz_xrealloc(void * ptr,unsigned long size)159 nmz_xrealloc(void *ptr, unsigned long size)
160 {
161     void *mem;
162 
163     if (!ptr) return nmz_xmalloc(size);
164     mem = realloc(ptr, size);
165     return mem;
166 }
167 
168 
169 
170 void
nmz_tr(char * str,const char * lstr,const char * rstr)171 nmz_tr(char *str, const char *lstr, const char *rstr)
172 {
173     while (*str) {
174 	char *idx = strchr(lstr, *str);
175 	if (idx != NULL) { /* found */
176 	    *str = *(idx - lstr + rstr);
177 	}
178         str++;
179     }
180 }
181 
182 /*
183  * Delete ending white spaces in the str.
184  */
185 void
nmz_chomp(char * str)186 nmz_chomp(char *str)
187 {
188     char *p = str + strlen(str) - 1;
189 
190     for (; p >= str; p--) {
191 	if (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t') {
192 	    *p = '\0';
193 	} else {
194 	    break;
195 	}
196     }
197 }
198 
199 
200 /*
201  * Do fread with endian consideration.
202  */
203 size_t
nmz_fread(void * ptr,size_t size,size_t nmemb,FILE * stream)204 nmz_fread(void *ptr, size_t size, size_t nmemb, FILE *stream)
205 {
206     size_t value;
207 
208     value = fread(ptr, size, nmemb, stream);
209 /*
210  * FIXME: Please tell me if you know more better way.
211  */
212 #ifndef WORDS_BIGENDIAN
213     reverse_byte_order(ptr, nmemb, size);
214 #endif
215     return value;
216 }
217 
218 int
nmz_get_unpackw(FILE * fp,int * data)219 nmz_get_unpackw(FILE *fp, int *data)
220 {
221     int val = 0, i = 0;
222 
223     while (1) {
224 	int tmp = getc(fp);
225 	i++;
226 	if (tmp == EOF) {
227 	    return 0;
228 	}
229 	if (tmp < 128) {
230 	    val += tmp;
231 	    *data = val;
232 	    return i;
233 	} else {
234 	    tmp &= 0x7f;
235 	    val += tmp;
236 	    val <<= 7;
237 	}
238     }
239 }
240 
241 int
nmz_read_unpackw(FILE * fp,int * buf,int size)242 nmz_read_unpackw(FILE *fp, int *buf, int size) {
243     int i = 0,  n = 0;
244 
245     while (i < size) {
246 	int tmp = nmz_get_unpackw(fp, &buf[n]);
247 	n++;
248 	if (tmp == 0) {  /* Error */
249 	    break;
250 	} else {
251 	    i += tmp;
252 	}
253     }
254     return  n;
255 }
256 
257 /*
258  * Read an index and return its value which is a pointer to another file.
259  */
260 long
nmz_getidxptr(FILE * fp,long point)261 nmz_getidxptr(FILE * fp, long point)
262 {
263     int val;
264 
265     fseek(fp, point * sizeof(int), 0);
266     nmz_fread(&val, sizeof(int), 1, fp);
267     return (long) val;
268 }
269 
270 /*
271  * Warning messaging function.
272  */
273 void
nmz_warn_printf(const char * fmt,...)274 nmz_warn_printf(const char *fmt, ...)
275 {
276     va_list args;
277     FILE *fp;
278 
279     if (!nmz_is_loggingmode())
280 	return;
281     if (nmz_is_output_warn_to_file()) {
282 	fp = fopen(NMZ.warnlog, "a+");
283 	if (fp == NULL) {
284 	    fprintf(stderr, "Can't open NMZ.warnlog.\n");
285 	    return;
286 	}
287     } else {
288 	fp = stderr;
289     }
290 
291     fflush(fp);
292 
293     fprintf(fp, "%s: ", PACKAGE);
294 
295     va_start(args, fmt);
296     vfprintf(fp, fmt, args);
297     va_end(args);
298 
299     if (fmt[strlen(fmt) - 1] != '\n') {
300 	fprintf(fp, "\n");
301     }
302 
303     fflush(fp);
304 
305     if (fp != stderr)
306 	fclose(fp);
307 }
308 
309 /*
310  * Debug messaging function.
311  */
312 void
nmz_debug_printf(const char * fmt,...)313 nmz_debug_printf(const char *fmt, ...)
314 {
315     va_list args;
316 
317     if (!nmz_is_debugmode()) {
318 	return;
319     }
320 
321     fflush(stdout);
322 
323     fprintf(stderr, "%s(debug): ", PACKAGE);
324 
325     va_start(args, fmt);
326     vfprintf(stderr, fmt, args);
327     va_end(args);
328 
329     if (fmt[strlen(fmt) - 1] != '\n') {
330 	fprintf(stderr, "\n");
331     }
332 
333     fflush(stderr);
334 }
335 
336 void
nmz_pathcat(const char * base,char * name)337 nmz_pathcat(const char *base, char *name)
338 {
339     char work[BUFSIZE];
340     int i;
341     int win32 = 0;
342 #if  defined(_WIN32) || defined(__EMX__)
343     win32 = 1;
344 #endif
345 
346     for (i = (int)strlen(name) - 1; i >= 0; i--) {
347         if (name[i] == '/' || (win32 && name[i] == '\\')) {
348             strcpy(name, name + i + 1);
349             break;
350         }
351     }
352     strncpy(work, base, BUFSIZE - 1);
353     work[BUFSIZE - 1] = '\0';
354     strncat(work, "/", BUFSIZE - strlen(work) - 1);
355     strncat(work, name, BUFSIZE - strlen(work) - 1);
356     strncpy(name, work, BUFSIZE - 1);
357 }
358 
359 int
nmz_isnumstr(const char * str)360 nmz_isnumstr(const char *str)
361 {
362     const unsigned char *p;
363 
364     if (strlen(str) > 10) {  /* Too large number */
365 	return 0;
366     }
367 
368     for (p = (const unsigned char *)str; *p != '\0'; p++) {
369 	if (!nmz_isdigit((int)*p)) {
370 	    return 0;
371 	}
372     }
373 
374     return 1;
375 }
376 
377 /*
378  * Substitute for tolower(3).
379  */
380 
381 int
_nmz_tolower(int c)382 _nmz_tolower(int c)
383 {
384     if (c >= 'A' && c <= 'Z') {
385         c = 'a' + c - 'A';
386         return c;
387     }
388     return c;
389 }
390 
391 int
_nmz_toupper(int c)392 _nmz_toupper(int c)
393 {
394     if (c >= 'a' && c <= 'z') {
395         c = 'A' + c - 'a';
396         return c;
397     }
398     return c;
399 }
400 
401 void
nmz_strlower(char * str)402 nmz_strlower(char *str)
403 {
404     while (*str) {
405 	/* Using ascii dependent lower same as mknmz.  */
406         *str = _nmz_tolower(*str);
407         str++;
408     }
409 }
410 
411 int
nmz_strprefixcasecmp(const char * str1,const char * str2)412 nmz_strprefixcasecmp(const char *str1, const char *str2)
413 {
414     size_t len1, len2;
415 
416     len1 = strlen(str1);
417     len2 = strlen(str2);
418 
419     if (len1 > len2) {
420 	return strncasecmp(str1, str2, len2);
421     } else {
422 	return strncasecmp(str1, str2, len1);
423     }
424 }
425 
426 int
nmz_strprefixcmp(const char * str1,const char * str2)427 nmz_strprefixcmp(const char *str1, const char *str2)
428 {
429     size_t len1, len2;
430 
431     len1 = strlen(str1);
432     len2 = strlen(str2);
433 
434     if (len1 > len2) {
435 	return strncmp(str1, str2, len2);
436     } else {
437 	return strncmp(str1, str2, len1);
438     }
439 }
440 
441 int
nmz_strsuffixcmp(const char * str1,const char * str2)442 nmz_strsuffixcmp(const char *str1, const char *str2)
443 {
444     size_t len1, len2;
445 
446     len1 = strlen(str1);
447     len2 = strlen(str2);
448 
449     if (len1 > len2) {
450 	return strcmp(str1 + len1 - len2, str2);
451     } else {
452 	return strcmp(str1, str2 + len2 - len1);
453     }
454 }
455 
456 /*
457  * Load the whole of file.
458  */
459 char *
nmz_readfile(const char * fname)460 nmz_readfile(const char *fname)
461 {
462     char *buf;
463     FILE *fp;
464     struct stat fstatus;
465 
466     errno = 0; /* errno must be initialized. */
467 
468     stat(fname, &fstatus);
469     fp = fopen(fname, "rb");
470     if (fp == NULL) {
471         nmz_warn_printf("%s: %s", fname, strerror(errno));
472         return NULL;
473     }
474     buf = malloc(fstatus.st_size + 1);
475     if (buf == NULL) {
476 	nmz_set_dyingmsg(nmz_msg("%s: %s", fname, strerror(errno)));
477         fclose(fp);
478 	return NULL;
479     }
480     if (fstatus.st_size != 0 &&
481 	fread(buf, sizeof(char), fstatus.st_size, fp) == 0)
482     {
483         nmz_set_dyingmsg(nmz_msg("%s: %s", fname, strerror(errno)));
484         free(buf);
485         fclose(fp);
486 	return NULL;
487     }
488     *(buf + fstatus.st_size) = '\0';
489     fclose(fp);
490     return buf;
491 }
492 
493 /*
494  * Safe version of getenv.
495  */
496 char *
nmz_getenv(const char * s)497 nmz_getenv(const char *s)
498 {
499     char *cp;
500     return (cp = getenv(s)) ? cp : "";
501 }
502 
503 /*
504  * Decoding URI encoded strings
505  */
506 void
nmz_decode_uri(char * str)507 nmz_decode_uri(char *str)
508 {
509     int i, j;
510     for (i = j = 0; str[i]; i++, j++) {
511 	if (str[i] == '%') {
512 	    if (str[i+1] != 0 && str[i+2] != 0) {
513 		str[j] = decode_uri_sub(str[i + 1], str[i + 2]);
514 		i += 2;
515 	    }
516 	} else if (str[i] == '+') {
517 	    str[j] = ' ';
518 	} else {
519 	    str[j] = str[i];
520 	}
521     }
522     str[j] = '\0';
523 }
524 
525 /*
526  * Returns a string describing the libnmz error code passed
527  * in the argument errnum just like strerror().
528  */
529 char *
nmz_strerror(enum nmz_stat errnum)530 nmz_strerror(enum nmz_stat errnum)
531 {
532     char *msg = NULL;
533 
534     switch (errnum) {
535     case ERR_FATAL:
536 	msg = _("Fatal error occurred!");
537 	break;
538     case ERR_TOO_LONG_QUERY:
539         msg = _("Too long query");
540 	break;
541     case ERR_INVALID_QUERY:
542 	msg = _("Invalid query");
543 	break;
544     case ERR_TOO_MANY_TOKENS:
545 	msg = _("Too many query tokens");
546 	break;
547     case  ERR_TOO_MUCH_MATCH:
548 	msg = _("Too many words matched. Ignored");
549 	break;
550     case ERR_TOO_MUCH_HIT:
551 	msg = _("Too many documents hit. Ignored");
552 	break;
553     case ERR_REGEX_SEARCH_FAILED:
554 	msg = _("can't open the regex index");
555 	break;
556     case ERR_PHRASE_SEARCH_FAILED:
557 	msg = _("can't open the phrase index");
558 	break;
559     case ERR_FIELD_SEARCH_FAILED:
560 	msg = _("can't open the field index");
561 	break;
562     case ERR_CANNOT_OPEN_INDEX:
563 	msg = _("can't open the index");
564 	break;
565     case ERR_CANNOT_OPEN_RESULT_FORMAT_FILE:
566 	msg = _("can't open the result format file");
567 	break;
568     case ERR_NO_PERMISSION:
569 	msg = _("You don't have a permission to access the index");
570 	break;
571     case ERR_INDEX_IS_LOCKED:
572 	msg = _("The index is locked for maintenance");
573 	break;
574     case ERR_OLD_INDEX_FORMAT:
575 	msg = _("Present index is old type. it's unsupported.");
576 	break;
577     default:
578 	msg = _("Unknown error. Report bug!");
579 	break;
580     }
581 
582     assert(msg != NULL);
583 
584     return msg;
585 }
586 
587 /*
588  * Check wheter the file fname exists or not.
589  */
590 int
nmz_is_file_exists(const char * fname)591 nmz_is_file_exists(const char *fname)
592 {
593     struct stat fstatus;
594 
595     return stat(fname, &fstatus) == 0;
596 }
597 
598 /*
599  *   for directory traversal issue.
600  *   Must be encoded in EUC-JP encoding.
601  */
602 char *
nmz_delete_since_path_delimitation(char * dest,const char * src,size_t n)603 nmz_delete_since_path_delimitation(char *dest, const char *src, size_t n)
604 {
605     char *p;
606 
607     if (n < 1) {
608         return dest;
609     }
610 
611     strncpy(dest, src, n - 1);
612     dest[n - 1] = '\0';
613 
614     p = dest;
615     while(*p) {
616         if (*p == '/' || *p == '\\') {
617             *p = '\0';
618             break;
619         }
620         p++;
621     }
622 
623     return dest;
624 }
625