1 /*
2 *
3 * $Id: util.c,v 1.75.4.20 2008-07-10 19:46:45 opengl2772 Exp $
4 *
5 * Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved.
6 * Copyright (C) 2000-2008 Namazu Project All rights reserved.
7 * This is free software with ABSOLUTELY NO WARRANTY.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 * 02111-1307, USA
23 *
24 * nmz_scan_hex(), nmz_scan_oct(), nmz_xmalloc(), nmz_xrealloc() are
25 * originally imported from Ruby b19's "util.c" and "gc.c".
26 * Thanks to Mr. Yukihiro Matsumoto <matz@netlab.co.jp>.
27 *
28 */
29
30 #ifdef HAVE_CONFIG_H
31 # include "config.h"
32 #endif
33 #ifdef HAVE_SUPPORT_H
34 # include "support.h"
35 #endif
36
37 #include <assert.h>
38 #include <stdio.h>
39 #ifdef HAVE_STDLIB_H
40 # include <stdlib.h>
41 #endif
42 #include <ctype.h>
43 #include <stdarg.h>
44
45 #ifdef HAVE_ERRNO_H
46 # include <errno.h>
47 #endif
48
49 #ifdef __EMX__
50 #include <sys/types.h>
51 #endif
52 #include <sys/stat.h>
53
54 #ifdef HAVE_STRING_H
55 # include <string.h>
56 #else
57 # include <strings.h>
58 #endif
59
60 #include "libnamazu.h"
61 #include "util.h"
62 #include "i18n.h"
63 #include "var.h"
64 #include "system.h"
65
66 /*
67 *
68 * Private functions
69 *
70 */
71
72 static void reverse_byte_order (void*, size_t, size_t);
73 static char decode_uri_sub(char c1, char c2);
74
75 /*
76 * Reverse byte order. It's type independent.
77 */
78 static void
reverse_byte_order(void * p,size_t n,size_t size)79 reverse_byte_order (void *p, size_t n, size_t size)
80 {
81 int i, j;
82 char *pp, tmp;
83
84 pp = (char *)p;
85 for (i = 0; i < (int)n; i++) {
86 char *c = (pp + (i * size));
87 for (j = 0; j < (int)(size / 2); j++) {
88 tmp = *(c + j);
89 *(c + j)= *(c + size - 1 - j);
90 *(c + size - 1 - j) = tmp;
91 }
92 }
93 }
94
95 static char
decode_uri_sub(char c1,char c2)96 decode_uri_sub(char c1, char c2)
97 {
98 char c;
99
100 c = ((c1 >= 'A' ? (_nmz_toupper((unsigned char)c1) - 'A') + 10 : (c1 - '0'))) * 16;
101 c += ( c2 >= 'A' ? (_nmz_toupper((unsigned char)c2) - 'A') + 10 : (c2 - '0'));
102 return c;
103 }
104
105 /*
106 *
107 * Public functions
108 *
109 */
110
111 unsigned long
nmz_scan_oct(const char * start,int len,int * retlen)112 nmz_scan_oct(const char *start, int len, int *retlen)
113 {
114 register const char *s = start;
115 register unsigned long retval = 0;
116
117 while (len-- && *s >= '0' && *s <= '7') {
118 retval <<= 3;
119 retval |= *s++ - '0';
120 }
121
122 *retlen = (int)(s - start);
123 return retval;
124 }
125
126 unsigned long
nmz_scan_hex(const char * start,int len,int * retlen)127 nmz_scan_hex(const char *start, int len, int *retlen)
128 {
129 static char hexdigit[] = "0123456789abcdef0123456789ABCDEFx";
130 register const char *s = start;
131 register unsigned long retval = 0;
132 char *tmp;
133
134 while (len-- && *s && (tmp = strchr(hexdigit, *s))) {
135 retval <<= 4;
136 retval |= (tmp - hexdigit) & 15;
137 s++;
138 }
139 *retlen = (int)(s - start);
140 return retval;
141 }
142
143
144 static unsigned long malloc_memories = 0;
145
146 void *
nmz_xmalloc(unsigned long size)147 nmz_xmalloc(unsigned long size)
148 {
149 void *mem;
150
151 if (size == 0) size = 1;
152 malloc_memories += size;
153 mem = malloc(size);
154
155 return mem;
156 }
157
158 void *
nmz_xrealloc(void * ptr,unsigned long size)159 nmz_xrealloc(void *ptr, unsigned long size)
160 {
161 void *mem;
162
163 if (!ptr) return nmz_xmalloc(size);
164 mem = realloc(ptr, size);
165 return mem;
166 }
167
168
169
170 void
nmz_tr(char * str,const char * lstr,const char * rstr)171 nmz_tr(char *str, const char *lstr, const char *rstr)
172 {
173 while (*str) {
174 char *idx = strchr(lstr, *str);
175 if (idx != NULL) { /* found */
176 *str = *(idx - lstr + rstr);
177 }
178 str++;
179 }
180 }
181
182 /*
183 * Delete ending white spaces in the str.
184 */
185 void
nmz_chomp(char * str)186 nmz_chomp(char *str)
187 {
188 char *p = str + strlen(str) - 1;
189
190 for (; p >= str; p--) {
191 if (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t') {
192 *p = '\0';
193 } else {
194 break;
195 }
196 }
197 }
198
199
200 /*
201 * Do fread with endian consideration.
202 */
203 size_t
nmz_fread(void * ptr,size_t size,size_t nmemb,FILE * stream)204 nmz_fread(void *ptr, size_t size, size_t nmemb, FILE *stream)
205 {
206 size_t value;
207
208 value = fread(ptr, size, nmemb, stream);
209 /*
210 * FIXME: Please tell me if you know more better way.
211 */
212 #ifndef WORDS_BIGENDIAN
213 reverse_byte_order(ptr, nmemb, size);
214 #endif
215 return value;
216 }
217
218 int
nmz_get_unpackw(FILE * fp,int * data)219 nmz_get_unpackw(FILE *fp, int *data)
220 {
221 int val = 0, i = 0;
222
223 while (1) {
224 int tmp = getc(fp);
225 i++;
226 if (tmp == EOF) {
227 return 0;
228 }
229 if (tmp < 128) {
230 val += tmp;
231 *data = val;
232 return i;
233 } else {
234 tmp &= 0x7f;
235 val += tmp;
236 val <<= 7;
237 }
238 }
239 }
240
241 int
nmz_read_unpackw(FILE * fp,int * buf,int size)242 nmz_read_unpackw(FILE *fp, int *buf, int size) {
243 int i = 0, n = 0;
244
245 while (i < size) {
246 int tmp = nmz_get_unpackw(fp, &buf[n]);
247 n++;
248 if (tmp == 0) { /* Error */
249 break;
250 } else {
251 i += tmp;
252 }
253 }
254 return n;
255 }
256
257 /*
258 * Read an index and return its value which is a pointer to another file.
259 */
260 long
nmz_getidxptr(FILE * fp,long point)261 nmz_getidxptr(FILE * fp, long point)
262 {
263 int val;
264
265 fseek(fp, point * sizeof(int), 0);
266 nmz_fread(&val, sizeof(int), 1, fp);
267 return (long) val;
268 }
269
270 /*
271 * Warning messaging function.
272 */
273 void
nmz_warn_printf(const char * fmt,...)274 nmz_warn_printf(const char *fmt, ...)
275 {
276 va_list args;
277 FILE *fp;
278
279 if (!nmz_is_loggingmode())
280 return;
281 if (nmz_is_output_warn_to_file()) {
282 fp = fopen(NMZ.warnlog, "a+");
283 if (fp == NULL) {
284 fprintf(stderr, "Can't open NMZ.warnlog.\n");
285 return;
286 }
287 } else {
288 fp = stderr;
289 }
290
291 fflush(fp);
292
293 fprintf(fp, "%s: ", PACKAGE);
294
295 va_start(args, fmt);
296 vfprintf(fp, fmt, args);
297 va_end(args);
298
299 if (fmt[strlen(fmt) - 1] != '\n') {
300 fprintf(fp, "\n");
301 }
302
303 fflush(fp);
304
305 if (fp != stderr)
306 fclose(fp);
307 }
308
309 /*
310 * Debug messaging function.
311 */
312 void
nmz_debug_printf(const char * fmt,...)313 nmz_debug_printf(const char *fmt, ...)
314 {
315 va_list args;
316
317 if (!nmz_is_debugmode()) {
318 return;
319 }
320
321 fflush(stdout);
322
323 fprintf(stderr, "%s(debug): ", PACKAGE);
324
325 va_start(args, fmt);
326 vfprintf(stderr, fmt, args);
327 va_end(args);
328
329 if (fmt[strlen(fmt) - 1] != '\n') {
330 fprintf(stderr, "\n");
331 }
332
333 fflush(stderr);
334 }
335
336 void
nmz_pathcat(const char * base,char * name)337 nmz_pathcat(const char *base, char *name)
338 {
339 char work[BUFSIZE];
340 int i;
341 int win32 = 0;
342 #if defined(_WIN32) || defined(__EMX__)
343 win32 = 1;
344 #endif
345
346 for (i = (int)strlen(name) - 1; i >= 0; i--) {
347 if (name[i] == '/' || (win32 && name[i] == '\\')) {
348 strcpy(name, name + i + 1);
349 break;
350 }
351 }
352 strncpy(work, base, BUFSIZE - 1);
353 work[BUFSIZE - 1] = '\0';
354 strncat(work, "/", BUFSIZE - strlen(work) - 1);
355 strncat(work, name, BUFSIZE - strlen(work) - 1);
356 strncpy(name, work, BUFSIZE - 1);
357 }
358
359 int
nmz_isnumstr(const char * str)360 nmz_isnumstr(const char *str)
361 {
362 const unsigned char *p;
363
364 if (strlen(str) > 10) { /* Too large number */
365 return 0;
366 }
367
368 for (p = (const unsigned char *)str; *p != '\0'; p++) {
369 if (!nmz_isdigit((int)*p)) {
370 return 0;
371 }
372 }
373
374 return 1;
375 }
376
377 /*
378 * Substitute for tolower(3).
379 */
380
381 int
_nmz_tolower(int c)382 _nmz_tolower(int c)
383 {
384 if (c >= 'A' && c <= 'Z') {
385 c = 'a' + c - 'A';
386 return c;
387 }
388 return c;
389 }
390
391 int
_nmz_toupper(int c)392 _nmz_toupper(int c)
393 {
394 if (c >= 'a' && c <= 'z') {
395 c = 'A' + c - 'a';
396 return c;
397 }
398 return c;
399 }
400
401 void
nmz_strlower(char * str)402 nmz_strlower(char *str)
403 {
404 while (*str) {
405 /* Using ascii dependent lower same as mknmz. */
406 *str = _nmz_tolower(*str);
407 str++;
408 }
409 }
410
411 int
nmz_strprefixcasecmp(const char * str1,const char * str2)412 nmz_strprefixcasecmp(const char *str1, const char *str2)
413 {
414 size_t len1, len2;
415
416 len1 = strlen(str1);
417 len2 = strlen(str2);
418
419 if (len1 > len2) {
420 return strncasecmp(str1, str2, len2);
421 } else {
422 return strncasecmp(str1, str2, len1);
423 }
424 }
425
426 int
nmz_strprefixcmp(const char * str1,const char * str2)427 nmz_strprefixcmp(const char *str1, const char *str2)
428 {
429 size_t len1, len2;
430
431 len1 = strlen(str1);
432 len2 = strlen(str2);
433
434 if (len1 > len2) {
435 return strncmp(str1, str2, len2);
436 } else {
437 return strncmp(str1, str2, len1);
438 }
439 }
440
441 int
nmz_strsuffixcmp(const char * str1,const char * str2)442 nmz_strsuffixcmp(const char *str1, const char *str2)
443 {
444 size_t len1, len2;
445
446 len1 = strlen(str1);
447 len2 = strlen(str2);
448
449 if (len1 > len2) {
450 return strcmp(str1 + len1 - len2, str2);
451 } else {
452 return strcmp(str1, str2 + len2 - len1);
453 }
454 }
455
456 /*
457 * Load the whole of file.
458 */
459 char *
nmz_readfile(const char * fname)460 nmz_readfile(const char *fname)
461 {
462 char *buf;
463 FILE *fp;
464 struct stat fstatus;
465
466 errno = 0; /* errno must be initialized. */
467
468 stat(fname, &fstatus);
469 fp = fopen(fname, "rb");
470 if (fp == NULL) {
471 nmz_warn_printf("%s: %s", fname, strerror(errno));
472 return NULL;
473 }
474 buf = malloc(fstatus.st_size + 1);
475 if (buf == NULL) {
476 nmz_set_dyingmsg(nmz_msg("%s: %s", fname, strerror(errno)));
477 fclose(fp);
478 return NULL;
479 }
480 if (fstatus.st_size != 0 &&
481 fread(buf, sizeof(char), fstatus.st_size, fp) == 0)
482 {
483 nmz_set_dyingmsg(nmz_msg("%s: %s", fname, strerror(errno)));
484 free(buf);
485 fclose(fp);
486 return NULL;
487 }
488 *(buf + fstatus.st_size) = '\0';
489 fclose(fp);
490 return buf;
491 }
492
493 /*
494 * Safe version of getenv.
495 */
496 char *
nmz_getenv(const char * s)497 nmz_getenv(const char *s)
498 {
499 char *cp;
500 return (cp = getenv(s)) ? cp : "";
501 }
502
503 /*
504 * Decoding URI encoded strings
505 */
506 void
nmz_decode_uri(char * str)507 nmz_decode_uri(char *str)
508 {
509 int i, j;
510 for (i = j = 0; str[i]; i++, j++) {
511 if (str[i] == '%') {
512 if (str[i+1] != 0 && str[i+2] != 0) {
513 str[j] = decode_uri_sub(str[i + 1], str[i + 2]);
514 i += 2;
515 }
516 } else if (str[i] == '+') {
517 str[j] = ' ';
518 } else {
519 str[j] = str[i];
520 }
521 }
522 str[j] = '\0';
523 }
524
525 /*
526 * Returns a string describing the libnmz error code passed
527 * in the argument errnum just like strerror().
528 */
529 char *
nmz_strerror(enum nmz_stat errnum)530 nmz_strerror(enum nmz_stat errnum)
531 {
532 char *msg = NULL;
533
534 switch (errnum) {
535 case ERR_FATAL:
536 msg = _("Fatal error occurred!");
537 break;
538 case ERR_TOO_LONG_QUERY:
539 msg = _("Too long query");
540 break;
541 case ERR_INVALID_QUERY:
542 msg = _("Invalid query");
543 break;
544 case ERR_TOO_MANY_TOKENS:
545 msg = _("Too many query tokens");
546 break;
547 case ERR_TOO_MUCH_MATCH:
548 msg = _("Too many words matched. Ignored");
549 break;
550 case ERR_TOO_MUCH_HIT:
551 msg = _("Too many documents hit. Ignored");
552 break;
553 case ERR_REGEX_SEARCH_FAILED:
554 msg = _("can't open the regex index");
555 break;
556 case ERR_PHRASE_SEARCH_FAILED:
557 msg = _("can't open the phrase index");
558 break;
559 case ERR_FIELD_SEARCH_FAILED:
560 msg = _("can't open the field index");
561 break;
562 case ERR_CANNOT_OPEN_INDEX:
563 msg = _("can't open the index");
564 break;
565 case ERR_CANNOT_OPEN_RESULT_FORMAT_FILE:
566 msg = _("can't open the result format file");
567 break;
568 case ERR_NO_PERMISSION:
569 msg = _("You don't have a permission to access the index");
570 break;
571 case ERR_INDEX_IS_LOCKED:
572 msg = _("The index is locked for maintenance");
573 break;
574 case ERR_OLD_INDEX_FORMAT:
575 msg = _("Present index is old type. it's unsupported.");
576 break;
577 default:
578 msg = _("Unknown error. Report bug!");
579 break;
580 }
581
582 assert(msg != NULL);
583
584 return msg;
585 }
586
587 /*
588 * Check wheter the file fname exists or not.
589 */
590 int
nmz_is_file_exists(const char * fname)591 nmz_is_file_exists(const char *fname)
592 {
593 struct stat fstatus;
594
595 return stat(fname, &fstatus) == 0;
596 }
597
598 /*
599 * for directory traversal issue.
600 * Must be encoded in EUC-JP encoding.
601 */
602 char *
nmz_delete_since_path_delimitation(char * dest,const char * src,size_t n)603 nmz_delete_since_path_delimitation(char *dest, const char *src, size_t n)
604 {
605 char *p;
606
607 if (n < 1) {
608 return dest;
609 }
610
611 strncpy(dest, src, n - 1);
612 dest[n - 1] = '\0';
613
614 p = dest;
615 while(*p) {
616 if (*p == '/' || *p == '\\') {
617 *p = '\0';
618 break;
619 }
620 p++;
621 }
622
623 return dest;
624 }
625