1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 #include <config.h>
39 
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_SYS_STAT_H
47 #include <sys/stat.h>
48 #endif
49 #ifdef HAVE_SYS_TYPES_H
50 #include <sys/types.h>
51 #endif
52 #include <assert.h>
53 
54 #include "sphinxbase/pio.h"
55 #include "sphinxbase/filename.h"
56 #include "sphinxbase/err.h"
57 #include "sphinxbase/strfuncs.h"
58 #include "sphinxbase/ckd_alloc.h"
59 
60 #ifndef EXEEXT
61 #define EXEEXT ""
62 #endif
63 
64 enum {
65     COMP_NONE,
66     COMP_COMPRESS,
67     COMP_GZIP,
68     COMP_BZIP2
69 };
70 
71 static void
guess_comptype(char const * file,int32 * ispipe,int32 * isgz)72 guess_comptype(char const *file, int32 *ispipe, int32 *isgz)
73 {
74     int k;
75 
76     k = strlen(file);
77     *ispipe = 0;
78     *isgz = COMP_NONE;
79     if ((k > 2)
80         && ((strcmp(file + k - 2, ".Z") == 0)
81             || (strcmp(file + k - 2, ".z") == 0))) {
82         *ispipe = 1;
83         *isgz = COMP_COMPRESS;
84     }
85     else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0)
86                         || (strcmp(file + k - 3, ".GZ") == 0))) {
87         *ispipe = 1;
88         *isgz = COMP_GZIP;
89     }
90     else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0)
91                         || (strcmp(file + k - 4, ".BZ2") == 0))) {
92         *ispipe = 1;
93         *isgz = COMP_BZIP2;
94     }
95 }
96 
97 FILE *
fopen_comp(const char * file,const char * mode,int32 * ispipe)98 fopen_comp(const char *file, const char *mode, int32 * ispipe)
99 {
100     FILE *fp;
101 
102 #ifndef HAVE_POPEN
103     *ispipe = 0; /* No popen() on WinCE */
104 #else /* HAVE_POPEN */
105     int32 isgz;
106     guess_comptype(file, ispipe, &isgz);
107 #endif /* HAVE_POPEN */
108 
109     if (*ispipe) {
110 #ifndef HAVE_POPEN
111         /* Shouldn't get here, anyway */
112         E_FATAL("No popen() on WinCE\n");
113 #else
114         if (strcmp(mode, "r") == 0) {
115             char *command;
116             switch (isgz) {
117             case COMP_GZIP:
118                 command = string_join("gunzip" EXEEXT, " -c ", file, NULL);
119                 break;
120             case COMP_COMPRESS:
121                 command = string_join("zcat" EXEEXT, " ", file, NULL);
122                 break;
123             case COMP_BZIP2:
124                 command = string_join("bunzip2" EXEEXT, " -c ", file, NULL);
125                 break;
126             default:
127                 command = NULL; /* Make compiler happy. */
128                 E_FATAL("Unknown  compression type %d\n", isgz);
129             }
130             if ((fp = popen(command, mode)) == NULL) {
131                 E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode);
132                 ckd_free(command);
133                 return NULL;
134             }
135             ckd_free(command);
136         }
137         else if (strcmp(mode, "w") == 0) {
138             char *command;
139             switch (isgz) {
140             case COMP_GZIP:
141                 command = string_join("gzip" EXEEXT, " > ", file, NULL);
142                 break;
143             case COMP_COMPRESS:
144                 command = string_join("compress" EXEEXT, " -c > ", file, NULL);
145                 break;
146             case COMP_BZIP2:
147                 command = string_join("bzip2" EXEEXT, " > ", file, NULL);
148                 break;
149             default:
150                 command = NULL; /* Make compiler happy. */
151                 E_FATAL("Unknown compression type %d\n", isgz);
152             }
153             if ((fp = popen(command, mode)) == NULL) {
154                 E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode);
155                 ckd_free(command);
156                 return NULL;
157             }
158             ckd_free(command);
159         }
160         else {
161             E_ERROR("Compressed file operation for mode %s is not supported", mode);
162             return NULL;
163         }
164 #endif /* HAVE_POPEN */
165     }
166     else {
167         fp = fopen(file, mode);
168     }
169 
170     return (fp);
171 }
172 
173 
174 void
fclose_comp(FILE * fp,int32 ispipe)175 fclose_comp(FILE * fp, int32 ispipe)
176 {
177     if (ispipe) {
178 #ifdef HAVE_POPEN
179 #if defined(_WIN32) && (!defined(__SYMBIAN32__))
180         _pclose(fp);
181 #else
182         pclose(fp);
183 #endif
184 #endif
185     }
186     else
187         fclose(fp);
188 }
189 
190 
191 FILE *
fopen_compchk(const char * file,int32 * ispipe)192 fopen_compchk(const char *file, int32 * ispipe)
193 {
194 #ifndef HAVE_POPEN
195     *ispipe = 0; /* No popen() on WinCE */
196     /* And therefore the rest of this function is useless. */
197     return (fopen_comp(file, "r", ispipe));
198 #else /* HAVE_POPEN */
199     int32 isgz;
200     FILE *fh;
201 
202     /* First just try to fopen_comp() it */
203     if ((fh = fopen_comp(file, "r", ispipe)) != NULL)
204         return fh;
205     else {
206         char *tmpfile;
207         int k;
208 
209         /* File doesn't exist; try other compressed/uncompressed form, as appropriate */
210         guess_comptype(file, ispipe, &isgz);
211         k = strlen(file);
212         tmpfile = ckd_calloc(k+5, 1);
213         strcpy(tmpfile, file);
214         switch (isgz) {
215         case COMP_GZIP:
216             tmpfile[k - 3] = '\0';
217             break;
218         case COMP_BZIP2:
219             tmpfile[k - 4] = '\0';
220             break;
221         case COMP_COMPRESS:
222             tmpfile[k - 2] = '\0';
223             break;
224         case COMP_NONE:
225             strcpy(tmpfile + k, ".gz");
226             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
227                 E_WARN("Using %s instead of %s\n", tmpfile, file);
228                 ckd_free(tmpfile);
229                 return fh;
230             }
231             strcpy(tmpfile + k, ".bz2");
232             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
233                 E_WARN("Using %s instead of %s\n", tmpfile, file);
234                 ckd_free(tmpfile);
235                 return fh;
236             }
237             strcpy(tmpfile + k, ".Z");
238             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
239                 E_WARN("Using %s instead of %s\n", tmpfile, file);
240                 ckd_free(tmpfile);
241                 return fh;
242             }
243             ckd_free(tmpfile);
244             return NULL;
245         }
246         E_WARN("Using %s instead of %s\n", tmpfile, file);
247         fh = fopen_comp(tmpfile, "r", ispipe);
248         ckd_free(tmpfile);
249         return NULL;
250     }
251 #endif /* HAVE_POPEN */
252 }
253 
254 lineiter_t *
lineiter_start(FILE * fh)255 lineiter_start(FILE *fh)
256 {
257     lineiter_t *li;
258 
259     li = ckd_calloc(1, sizeof(*li));
260     li->buf = ckd_malloc(128);
261     li->buf[0] = '\0';
262     li->bsiz = 128;
263     li->len = 0;
264     li->fh = fh;
265 
266     li = lineiter_next(li);
267 
268     /* Strip the UTF-8 BOM */
269 
270     if (li && 0 == strncmp(li->buf, "\xef\xbb\xbf", 3)) {
271 	memmove(li->buf, li->buf + 3, strlen(li->buf + 1));
272 	li->len -= 3;
273     }
274 
275     return li;
276 }
277 
278 lineiter_t *
lineiter_start_clean(FILE * fh)279 lineiter_start_clean(FILE *fh)
280 {
281     lineiter_t *li;
282 
283     li = lineiter_start(fh);
284 
285     if (li == NULL)
286 	return li;
287 
288     li->clean = TRUE;
289 
290     if (li->buf && li->buf[0] == '#') {
291 	li = lineiter_next(li);
292     } else {
293 	string_trim(li->buf, STRING_BOTH);
294     }
295 
296     return li;
297 }
298 
299 
300 static lineiter_t *
lineiter_next_plain(lineiter_t * li)301 lineiter_next_plain(lineiter_t *li)
302 {
303     /* We are reading the next line */
304     li->lineno++;
305 
306     /* Read a line and check for EOF. */
307     if (fgets(li->buf, li->bsiz, li->fh) == NULL) {
308         lineiter_free(li);
309         return NULL;
310     }
311     /* If we managed to read the whole thing, then we are done
312      * (this will be by far the most common result). */
313     li->len = strlen(li->buf);
314     if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
315         return li;
316 
317     /* Otherwise we have to reallocate and keep going. */
318     while (1) {
319         li->bsiz *= 2;
320         li->buf = ckd_realloc(li->buf, li->bsiz);
321         /* If we get an EOF, we are obviously done. */
322         if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) {
323             li->len += strlen(li->buf + li->len);
324             return li;
325         }
326         li->len += strlen(li->buf + li->len);
327         /* If we managed to read the whole thing, then we are done. */
328         if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
329             return li;
330     }
331 
332     /* Shouldn't get here. */
333     return li;
334 }
335 
336 
337 lineiter_t *
lineiter_next(lineiter_t * li)338 lineiter_next(lineiter_t *li)
339 {
340     if (!li->clean)
341 	return lineiter_next_plain(li);
342 
343     for (li = lineiter_next_plain(li); li; li = lineiter_next_plain(li)) {
344 	if (li->buf && li->buf[0] != '#') {
345 	    li->buf = string_trim(li->buf, STRING_BOTH);
346 	    break;
347 	}
348     }
349     return li;
350 }
351 
lineiter_lineno(lineiter_t * li)352 int lineiter_lineno(lineiter_t *li)
353 {
354     return li->lineno;
355 }
356 
357 void
lineiter_free(lineiter_t * li)358 lineiter_free(lineiter_t *li)
359 {
360     if (li == NULL)
361         return;
362     ckd_free(li->buf);
363     ckd_free(li);
364 }
365 
366 char *
fread_line(FILE * stream,size_t * out_len)367 fread_line(FILE *stream, size_t *out_len)
368 {
369     char *output, *outptr;
370     char buf[128];
371 
372     output = outptr = NULL;
373     while (fgets(buf, sizeof(buf), stream)) {
374         size_t len = strlen(buf);
375         /* Append this data to the buffer. */
376         if (output == NULL) {
377             output = ckd_malloc(len + 1);
378             outptr = output;
379         }
380         else {
381             size_t cur = outptr - output;
382             output = ckd_realloc(output, cur + len + 1);
383             outptr = output + cur;
384         }
385         memcpy(outptr, buf, len + 1);
386         outptr += len;
387         /* Stop on a short read or end of line. */
388         if (len < sizeof(buf)-1 || buf[len-1] == '\n')
389             break;
390     }
391     if (out_len) *out_len = outptr - output;
392     return output;
393 }
394 
395 #define FREAD_RETRY_COUNT	60
396 
397 int32
fread_retry(void * pointer,int32 size,int32 num_items,FILE * stream)398 fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream)
399 {
400     char *data;
401     uint32 n_items_read;
402     uint32 n_items_rem;
403     uint32 n_retry_rem;
404     int32 loc;
405 
406     n_retry_rem = FREAD_RETRY_COUNT;
407 
408     data = pointer;
409     loc = 0;
410     n_items_rem = num_items;
411 
412     do {
413         n_items_read = fread(&data[loc], size, n_items_rem, stream);
414 
415         n_items_rem -= n_items_read;
416 
417         if (n_items_rem > 0) {
418             /* an incomplete read occurred */
419 
420             if (n_retry_rem == 0)
421                 return -1;
422 
423             if (n_retry_rem == FREAD_RETRY_COUNT) {
424                 E_ERROR_SYSTEM("fread() failed; retrying...\n");
425             }
426 
427             --n_retry_rem;
428 
429             loc += n_items_read * size;
430 #ifdef HAVE_UNISTD_H
431             sleep(1);
432 #endif
433         }
434     } while (n_items_rem > 0);
435 
436     return num_items;
437 }
438 
439 
440 /* Silvio Moioli: updated to use Unicode */
441 #ifdef _WIN32_WCE /* No stat() on WinCE */
442 int32
stat_retry(const char * file,struct stat * statbuf)443 stat_retry(const char *file, struct stat * statbuf)
444 {
445     WIN32_FIND_DATAW file_data;
446     HANDLE *h;
447     wchar_t *wfile;
448     size_t len;
449 
450     len = mbstowcs(NULL, file, 0) + 1;
451     wfile = ckd_calloc(len, sizeof(*wfile));
452     mbstowcs(wfile, file, len);
453     if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) {
454         ckd_free(wfile);
455         return -1;
456     }
457     ckd_free(wfile);
458     memset(statbuf, 0, sizeof(statbuf));
459     statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime;
460     statbuf->st_size = file_data.nFileSizeLow;
461     FindClose(h);
462 
463     return 0;
464 }
465 
466 
467 int32
stat_mtime(const char * file)468 stat_mtime(const char *file)
469 {
470     struct stat statbuf;
471 
472     if (stat_retry(file, &statbuf) != 0)
473         return -1;
474 
475     return ((int32) statbuf.st_mtime);
476 }
477 #else
478 #define STAT_RETRY_COUNT	10
479 int32
stat_retry(const char * file,struct stat * statbuf)480 stat_retry(const char *file, struct stat * statbuf)
481 {
482     int32 i;
483 
484 
485 
486     for (i = 0; i < STAT_RETRY_COUNT; i++) {
487 
488 #ifndef HAVE_SYS_STAT_H
489 		FILE *fp;
490 
491 		if ((fp=(FILE *)fopen(file, "r"))!= 0)
492 		{
493 		    fseek( fp, 0, SEEK_END);
494 		    statbuf->st_size = ftell( fp );
495 		    fclose(fp);
496 		    return 0;
497 		}
498 
499 #else /* HAVE_SYS_STAT_H */
500         if (stat(file, statbuf) == 0)
501             return 0;
502 #endif
503         if (i == 0) {
504             E_ERROR_SYSTEM("Failed to stat file '%s'; retrying...", file);
505         }
506 #ifdef HAVE_UNISTD_H
507         sleep(1);
508 #endif
509     }
510 
511     return -1;
512 }
513 
514 int32
stat_mtime(const char * file)515 stat_mtime(const char *file)
516 {
517     struct stat statbuf;
518 
519 #ifdef HAVE_SYS_STAT_H
520     if (stat(file, &statbuf) != 0)
521         return -1;
522 #else /* HAVE_SYS_STAT_H */
523     if (stat_retry(file, &statbuf) != 0)
524         return -1;
525 #endif /* HAVE_SYS_STAT_H */
526 
527     return ((int32) statbuf.st_mtime);
528 }
529 #endif /* !_WIN32_WCE */
530 
531 struct bit_encode_s {
532     FILE *fh;
533     unsigned char buf, bbits;
534     int16 refcount;
535 };
536 
537 bit_encode_t *
bit_encode_attach(FILE * outfh)538 bit_encode_attach(FILE *outfh)
539 {
540     bit_encode_t *be;
541 
542     be = ckd_calloc(1, sizeof(*be));
543     be->refcount = 1;
544     be->fh = outfh;
545     return be;
546 }
547 
548 bit_encode_t *
bit_encode_retain(bit_encode_t * be)549 bit_encode_retain(bit_encode_t *be)
550 {
551     ++be->refcount;
552     return be;
553 }
554 
555 int
bit_encode_free(bit_encode_t * be)556 bit_encode_free(bit_encode_t *be)
557 {
558     if (be == NULL)
559         return 0;
560     if (--be->refcount > 0)
561         return be->refcount;
562     ckd_free(be);
563 
564     return 0;
565 }
566 
567 int
bit_encode_write(bit_encode_t * be,unsigned char const * bits,int nbits)568 bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits)
569 {
570     int tbits;
571 
572     tbits = nbits + be->bbits;
573     if (tbits < 8)  {
574         /* Append to buffer. */
575         be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits));
576     }
577     else {
578         int i = 0;
579         while (tbits >= 8) {
580             /* Shift bits out of the buffer and splice with high-order bits */
581             fputc(be->buf | ((bits[i]) >> be->bbits), be->fh);
582             /* Put low-order bits back into buffer */
583             be->buf = (bits[i] << (8 - be->bbits)) & 0xff;
584             tbits -= 8;
585             ++i;
586         }
587     }
588     /* tbits contains remaining number of  bits. */
589     be->bbits = tbits;
590 
591     return nbits;
592 }
593 
594 int
bit_encode_write_cw(bit_encode_t * be,uint32 codeword,int nbits)595 bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits)
596 {
597     unsigned char bits[4];
598     codeword <<= (32 - nbits);
599     bits[0] = (codeword >> 24) & 0xff;
600     bits[1] = (codeword >> 16) & 0xff;
601     bits[2] = (codeword >> 8) & 0xff;
602     bits[3] = codeword & 0xff;
603     return bit_encode_write(be, bits, nbits);
604 }
605 
606 int
bit_encode_flush(bit_encode_t * be)607 bit_encode_flush(bit_encode_t *be)
608 {
609     if (be->bbits) {
610         fputc(be->buf, be->fh);
611         be->bbits = 0;
612     }
613     return 0;
614 }
615 
616 #if defined(HAVE_SYS_STAT_H) && !defined(__MINGW32__) /* Unix, Cygwin, doesn't work on MINGW */
617 int
build_directory(const char * path)618 build_directory(const char *path)
619 {
620     int rv;
621 
622     /* Utterly failed... */
623     if (strlen(path) == 0)
624         return -1;
625     /* Utterly succeeded... */
626     else if ((rv = mkdir(path, 0777)) == 0)
627         return 0;
628     /* Or, it already exists... */
629     else if (errno == EEXIST)
630         return 0;
631     else if (errno != ENOENT) {
632         E_ERROR_SYSTEM("Failed to create %s");
633         return -1;
634     }
635     else {
636         char *dirname = ckd_salloc(path);
637         path2dirname(path, dirname);
638         build_directory(dirname);
639         ckd_free(dirname);
640         return mkdir(path, 0777);
641     }
642 }
643 #elif defined(_WIN32)
644 /* FIXME: Implement this. */
645 int
build_directory(const char * path)646 build_directory(const char *path)
647 {
648     E_ERROR("build_directory() unimplemented on your platform!\n");
649     return -1;
650 }
651 #else
652 int
build_directory(const char * path)653 build_directory(const char *path)
654 {
655     E_ERROR("build_directory() unimplemented on your platform!\n");
656     return -1;
657 }
658 #endif
659