1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41 
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <assert.h>
46 
47 #ifdef HAVE_UNISTD_H
48 #include <unistd.h>
49 #endif
50 
51 #ifdef HAVE_SYS_TYPES_H
52 #include <sys/types.h>
53 #endif
54 
55 #ifdef HAVE_SYS_STAT_H
56 #include <sys/stat.h>
57 #endif
58 
59 #if defined(_WIN32) && !defined(CYGWIN)
60 #include <direct.h>
61 #endif
62 
63 #include "sphinxbase/pio.h"
64 #include "sphinxbase/filename.h"
65 #include "sphinxbase/err.h"
66 #include "sphinxbase/strfuncs.h"
67 #include "sphinxbase/ckd_alloc.h"
68 
69 #ifndef EXEEXT
70 #define EXEEXT ""
71 #endif
72 
73 enum {
74     COMP_NONE,
75     COMP_COMPRESS,
76     COMP_GZIP,
77     COMP_BZIP2
78 };
79 
80 static void
guess_comptype(char const * file,int32 * ispipe,int32 * isgz)81 guess_comptype(char const *file, int32 *ispipe, int32 *isgz)
82 {
83     size_t k;
84 
85     k = strlen(file);
86     *ispipe = 0;
87     *isgz = COMP_NONE;
88     if ((k > 2)
89         && ((strcmp(file + k - 2, ".Z") == 0)
90             || (strcmp(file + k - 2, ".z") == 0))) {
91         *ispipe = 1;
92         *isgz = COMP_COMPRESS;
93     }
94     else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0)
95                         || (strcmp(file + k - 3, ".GZ") == 0))) {
96         *ispipe = 1;
97         *isgz = COMP_GZIP;
98     }
99     else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0)
100                         || (strcmp(file + k - 4, ".BZ2") == 0))) {
101         *ispipe = 1;
102         *isgz = COMP_BZIP2;
103     }
104 }
105 
106 FILE *
fopen_comp(const char * file,const char * mode,int32 * ispipe)107 fopen_comp(const char *file, const char *mode, int32 * ispipe)
108 {
109     FILE *fp;
110 
111 #ifndef HAVE_POPEN
112     *ispipe = 0; /* No popen() on WinCE */
113 #else /* HAVE_POPEN */
114     int32 isgz;
115     guess_comptype(file, ispipe, &isgz);
116 #endif /* HAVE_POPEN */
117 
118     if (*ispipe) {
119 #ifndef HAVE_POPEN
120         /* Shouldn't get here, anyway */
121         E_FATAL("No popen() on WinCE\n");
122 #else
123         if (strcmp(mode, "r") == 0) {
124             char *command;
125             switch (isgz) {
126             case COMP_GZIP:
127                 command = string_join("gunzip" EXEEXT, " -c ", file, NULL);
128                 break;
129             case COMP_COMPRESS:
130                 command = string_join("zcat" EXEEXT, " ", file, NULL);
131                 break;
132             case COMP_BZIP2:
133                 command = string_join("bunzip2" EXEEXT, " -c ", file, NULL);
134                 break;
135             default:
136                 command = NULL; /* Make compiler happy. */
137                 E_FATAL("Unknown  compression type %d\n", isgz);
138             }
139             if ((fp = popen(command, mode)) == NULL) {
140                 E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode);
141                 ckd_free(command);
142                 return NULL;
143             }
144             ckd_free(command);
145         }
146         else if (strcmp(mode, "w") == 0) {
147             char *command;
148             switch (isgz) {
149             case COMP_GZIP:
150                 command = string_join("gzip" EXEEXT, " > ", file, NULL);
151                 break;
152             case COMP_COMPRESS:
153                 command = string_join("compress" EXEEXT, " -c > ", file, NULL);
154                 break;
155             case COMP_BZIP2:
156                 command = string_join("bzip2" EXEEXT, " > ", file, NULL);
157                 break;
158             default:
159                 command = NULL; /* Make compiler happy. */
160                 E_FATAL("Unknown compression type %d\n", isgz);
161             }
162             if ((fp = popen(command, mode)) == NULL) {
163                 E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode);
164                 ckd_free(command);
165                 return NULL;
166             }
167             ckd_free(command);
168         }
169         else {
170             E_ERROR("Compressed file operation for mode %s is not supported", mode);
171             return NULL;
172         }
173 #endif /* HAVE_POPEN */
174     }
175     else {
176         fp = fopen(file, mode);
177     }
178 
179     return (fp);
180 }
181 
182 
183 void
fclose_comp(FILE * fp,int32 ispipe)184 fclose_comp(FILE * fp, int32 ispipe)
185 {
186     if (ispipe) {
187 #ifdef HAVE_POPEN
188 #if defined(_WIN32) && (!defined(__SYMBIAN32__))
189         _pclose(fp);
190 #else
191         pclose(fp);
192 #endif
193 #endif
194     }
195     else
196         fclose(fp);
197 }
198 
199 
200 FILE *
fopen_compchk(const char * file,int32 * ispipe)201 fopen_compchk(const char *file, int32 * ispipe)
202 {
203 #ifndef HAVE_POPEN
204     *ispipe = 0; /* No popen() on WinCE */
205     /* And therefore the rest of this function is useless. */
206     return (fopen_comp(file, "r", ispipe));
207 #else /* HAVE_POPEN */
208     int32 isgz;
209     FILE *fh;
210 
211     /* First just try to fopen_comp() it */
212     if ((fh = fopen_comp(file, "r", ispipe)) != NULL)
213         return fh;
214     else {
215         char *tmpfile;
216         size_t k;
217 
218         /* File doesn't exist; try other compressed/uncompressed form, as appropriate */
219         guess_comptype(file, ispipe, &isgz);
220         k = strlen(file);
221         tmpfile = ckd_calloc(k+5, 1);
222         strcpy(tmpfile, file);
223         switch (isgz) {
224         case COMP_GZIP:
225             tmpfile[k - 3] = '\0';
226             break;
227         case COMP_BZIP2:
228             tmpfile[k - 4] = '\0';
229             break;
230         case COMP_COMPRESS:
231             tmpfile[k - 2] = '\0';
232             break;
233         case COMP_NONE:
234             strcpy(tmpfile + k, ".gz");
235             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
236                 E_WARN("Using %s instead of %s\n", tmpfile, file);
237                 ckd_free(tmpfile);
238                 return fh;
239             }
240             strcpy(tmpfile + k, ".bz2");
241             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
242                 E_WARN("Using %s instead of %s\n", tmpfile, file);
243                 ckd_free(tmpfile);
244                 return fh;
245             }
246             strcpy(tmpfile + k, ".Z");
247             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
248                 E_WARN("Using %s instead of %s\n", tmpfile, file);
249                 ckd_free(tmpfile);
250                 return fh;
251             }
252             ckd_free(tmpfile);
253             return NULL;
254         }
255         E_WARN("Using %s instead of %s\n", tmpfile, file);
256         fh = fopen_comp(tmpfile, "r", ispipe);
257         ckd_free(tmpfile);
258         return NULL;
259     }
260 #endif /* HAVE_POPEN */
261 }
262 
263 lineiter_t *
lineiter_start(FILE * fh)264 lineiter_start(FILE *fh)
265 {
266     lineiter_t *li;
267 
268     li = (lineiter_t *)ckd_calloc(1, sizeof(*li));
269     li->buf = (char *)ckd_malloc(128);
270     li->buf[0] = '\0';
271     li->bsiz = 128;
272     li->len = 0;
273     li->fh = fh;
274 
275     li = lineiter_next(li);
276 
277     /* Strip the UTF-8 BOM */
278 
279     if (li && 0 == strncmp(li->buf, "\xef\xbb\xbf", 3)) {
280 	memmove(li->buf, li->buf + 3, strlen(li->buf + 1));
281 	li->len -= 3;
282     }
283 
284     return li;
285 }
286 
287 lineiter_t *
lineiter_start_clean(FILE * fh)288 lineiter_start_clean(FILE *fh)
289 {
290     lineiter_t *li;
291 
292     li = lineiter_start(fh);
293 
294     if (li == NULL)
295 	return li;
296 
297     li->clean = TRUE;
298 
299     if (li->buf && li->buf[0] == '#') {
300 	li = lineiter_next(li);
301     } else {
302 	string_trim(li->buf, STRING_BOTH);
303     }
304 
305     return li;
306 }
307 
308 
309 static lineiter_t *
lineiter_next_plain(lineiter_t * li)310 lineiter_next_plain(lineiter_t *li)
311 {
312     /* We are reading the next line */
313     li->lineno++;
314 
315     /* Read a line and check for EOF. */
316     if (fgets(li->buf, li->bsiz, li->fh) == NULL) {
317         lineiter_free(li);
318         return NULL;
319     }
320     /* If we managed to read the whole thing, then we are done
321      * (this will be by far the most common result). */
322     li->len = (int32)strlen(li->buf);
323     if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
324         return li;
325 
326     /* Otherwise we have to reallocate and keep going. */
327     while (1) {
328         li->bsiz *= 2;
329         li->buf = (char *)ckd_realloc(li->buf, li->bsiz);
330         /* If we get an EOF, we are obviously done. */
331         if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) {
332             li->len += strlen(li->buf + li->len);
333             return li;
334         }
335         li->len += strlen(li->buf + li->len);
336         /* If we managed to read the whole thing, then we are done. */
337         if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
338             return li;
339     }
340 
341     /* Shouldn't get here. */
342     return li;
343 }
344 
345 
346 lineiter_t *
lineiter_next(lineiter_t * li)347 lineiter_next(lineiter_t *li)
348 {
349     if (!li->clean)
350 	return lineiter_next_plain(li);
351 
352     for (li = lineiter_next_plain(li); li; li = lineiter_next_plain(li)) {
353 	if (li->buf && li->buf[0] != '#') {
354 	    li->buf = string_trim(li->buf, STRING_BOTH);
355 	    break;
356 	}
357     }
358     return li;
359 }
360 
lineiter_lineno(lineiter_t * li)361 int lineiter_lineno(lineiter_t *li)
362 {
363     return li->lineno;
364 }
365 
366 void
lineiter_free(lineiter_t * li)367 lineiter_free(lineiter_t *li)
368 {
369     if (li == NULL)
370         return;
371     ckd_free(li->buf);
372     ckd_free(li);
373 }
374 
375 char *
fread_line(FILE * stream,size_t * out_len)376 fread_line(FILE *stream, size_t *out_len)
377 {
378     char *output, *outptr;
379     char buf[128];
380 
381     output = outptr = NULL;
382     while (fgets(buf, sizeof(buf), stream)) {
383         size_t len = strlen(buf);
384         /* Append this data to the buffer. */
385         if (output == NULL) {
386             output = (char *)ckd_malloc(len + 1);
387             outptr = output;
388         }
389         else {
390             size_t cur = outptr - output;
391             output = (char *)ckd_realloc(output, cur + len + 1);
392             outptr = output + cur;
393         }
394         memcpy(outptr, buf, len + 1);
395         outptr += len;
396         /* Stop on a short read or end of line. */
397         if (len < sizeof(buf)-1 || buf[len-1] == '\n')
398             break;
399     }
400     if (out_len) *out_len = outptr - output;
401     return output;
402 }
403 
404 #define FREAD_RETRY_COUNT	60
405 
406 int32
fread_retry(void * pointer,int32 size,int32 num_items,FILE * stream)407 fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream)
408 {
409     char *data;
410     size_t n_items_read;
411     size_t n_items_rem;
412     uint32 n_retry_rem;
413     int32 loc;
414 
415     n_retry_rem = FREAD_RETRY_COUNT;
416 
417     data = (char *)pointer;
418     loc = 0;
419     n_items_rem = num_items;
420 
421     do {
422         n_items_read = fread(&data[loc], size, n_items_rem, stream);
423 
424         n_items_rem -= n_items_read;
425 
426         if (n_items_rem > 0) {
427             /* an incomplete read occurred */
428 
429             if (n_retry_rem == 0)
430                 return -1;
431 
432             if (n_retry_rem == FREAD_RETRY_COUNT) {
433                 E_ERROR_SYSTEM("fread() failed; retrying...\n");
434             }
435 
436             --n_retry_rem;
437 
438             loc += n_items_read * size;
439 #if !defined(_WIN32) && defined(HAVE_UNISTD_H)
440             sleep(1);
441 #endif
442         }
443     } while (n_items_rem > 0);
444 
445     return num_items;
446 }
447 
448 
449 #ifdef _WIN32_WCE /* No stat() on WinCE */
450 int32
stat_retry(const char * file,struct stat * statbuf)451 stat_retry(const char *file, struct stat * statbuf)
452 {
453     WIN32_FIND_DATAW file_data;
454     HANDLE *h;
455     wchar_t *wfile;
456     size_t len;
457 
458     len = mbstowcs(NULL, file, 0) + 1;
459     wfile = ckd_calloc(len, sizeof(*wfile));
460     mbstowcs(wfile, file, len);
461     if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) {
462         ckd_free(wfile);
463         return -1;
464     }
465     ckd_free(wfile);
466     memset(statbuf, 0, sizeof(*statbuf));
467     statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime;
468     statbuf->st_size = file_data.nFileSizeLow;
469     FindClose(h);
470 
471     return 0;
472 }
473 
474 
475 int32
stat_mtime(const char * file)476 stat_mtime(const char *file)
477 {
478     struct stat statbuf;
479 
480     if (stat_retry(file, &statbuf) != 0)
481         return -1;
482 
483     return ((int32) statbuf.st_mtime);
484 }
485 #else
486 #define STAT_RETRY_COUNT	10
487 int32
stat_retry(const char * file,struct stat * statbuf)488 stat_retry(const char *file, struct stat * statbuf)
489 {
490     int32 i;
491 
492     for (i = 0; i < STAT_RETRY_COUNT; i++) {
493 #ifndef HAVE_SYS_STAT_H
494 	FILE *fp;
495 
496 	if ((fp = (FILE *)fopen(file, "r")) != 0) {
497 	    fseek(fp, 0, SEEK_END);
498 	    statbuf->st_size = ftell(fp);
499 	    fclose(fp);
500 	    return 0;
501 	}
502 #else /* HAVE_SYS_STAT_H */
503         if (stat(file, statbuf) == 0)
504             return 0;
505 #endif
506         if (i == 0) {
507             E_ERROR_SYSTEM("Failed to stat file '%s'; retrying...", file);
508         }
509 #ifdef HAVE_UNISTD_H
510         sleep(1);
511 #endif
512     }
513 
514     return -1;
515 }
516 
517 int32
stat_mtime(const char * file)518 stat_mtime(const char *file)
519 {
520     struct stat statbuf;
521 
522 #ifdef HAVE_SYS_STAT_H
523     if (stat(file, &statbuf) != 0)
524         return -1;
525 #else /* HAVE_SYS_STAT_H */
526     if (stat_retry(file, &statbuf) != 0)
527         return -1;
528 #endif /* HAVE_SYS_STAT_H */
529 
530     return ((int32) statbuf.st_mtime);
531 }
532 #endif /* !_WIN32_WCE */
533 
534 struct bit_encode_s {
535     FILE *fh;
536     unsigned char buf, bbits;
537     int16 refcount;
538 };
539 
540 bit_encode_t *
bit_encode_attach(FILE * outfh)541 bit_encode_attach(FILE *outfh)
542 {
543     bit_encode_t *be;
544 
545     be = (bit_encode_t *)ckd_calloc(1, sizeof(*be));
546     be->refcount = 1;
547     be->fh = outfh;
548     return be;
549 }
550 
551 bit_encode_t *
bit_encode_retain(bit_encode_t * be)552 bit_encode_retain(bit_encode_t *be)
553 {
554     ++be->refcount;
555     return be;
556 }
557 
558 int
bit_encode_free(bit_encode_t * be)559 bit_encode_free(bit_encode_t *be)
560 {
561     if (be == NULL)
562         return 0;
563     if (--be->refcount > 0)
564         return be->refcount;
565     ckd_free(be);
566 
567     return 0;
568 }
569 
570 int
bit_encode_write(bit_encode_t * be,unsigned char const * bits,int nbits)571 bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits)
572 {
573     int tbits;
574 
575     tbits = nbits + be->bbits;
576     if (tbits < 8)  {
577         /* Append to buffer. */
578         be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits));
579     }
580     else {
581         int i = 0;
582         while (tbits >= 8) {
583             /* Shift bits out of the buffer and splice with high-order bits */
584             fputc(be->buf | ((bits[i]) >> be->bbits), be->fh);
585             /* Put low-order bits back into buffer */
586             be->buf = (bits[i] << (8 - be->bbits)) & 0xff;
587             tbits -= 8;
588             ++i;
589         }
590     }
591     /* tbits contains remaining number of  bits. */
592     be->bbits = tbits;
593 
594     return nbits;
595 }
596 
597 int
bit_encode_write_cw(bit_encode_t * be,uint32 codeword,int nbits)598 bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits)
599 {
600     unsigned char bits[4];
601     codeword <<= (32 - nbits);
602     bits[0] = (codeword >> 24) & 0xff;
603     bits[1] = (codeword >> 16) & 0xff;
604     bits[2] = (codeword >> 8) & 0xff;
605     bits[3] = codeword & 0xff;
606     return bit_encode_write(be, bits, nbits);
607 }
608 
609 int
bit_encode_flush(bit_encode_t * be)610 bit_encode_flush(bit_encode_t *be)
611 {
612     if (be->bbits) {
613         fputc(be->buf, be->fh);
614         be->bbits = 0;
615     }
616     return 0;
617 }
618 
619 int
build_directory(const char * path)620 build_directory(const char *path)
621 {
622     int rv;
623 
624     /* Utterly failed... */
625     if (strlen(path) == 0)
626         return -1;
627 
628 #if defined(_WIN32) && !defined(CYGWIN)
629     else if ((rv = _mkdir(path)) == 0)
630         return 0;
631 #elif defined(HAVE_SYS_STAT_H) /* Unix, Cygwin, doesn't work on MINGW */
632     else if ((rv = mkdir(path, 0777)) == 0)
633         return 0;
634 #endif
635 
636     /* Or, it already exists... */
637     else if (errno == EEXIST)
638         return 0;
639     else if (errno != ENOENT) {
640         E_ERROR_SYSTEM("Failed to create %s", path);
641         return -1;
642     }
643     else {
644         char *dirname = ckd_salloc(path);
645         path2dirname(path, dirname);
646         build_directory(dirname);
647         ckd_free(dirname);
648 
649 #if defined(_WIN32) && !defined(CYGWIN)
650 	return _mkdir(path);
651 #elif defined(HAVE_SYS_STAT_H) /* Unix, Cygwin, doesn't work on MINGW */
652         return mkdir(path, 0777);
653 #endif
654     }
655 }
656