1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * bio.h -- Sphinx-3 binary file I/O functions.
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1996 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  * $Log: bio.h,v $
49  * Revision 1.8  2005/06/21 20:40:46  arthchan2003
50  * 1, Fixed doxygen documentation, 2, Add the $ keyword.
51  *
52  * Revision 1.5  2005/06/13 04:02:57  archan
53  * Fixed most doxygen-style documentation under libs3decoder.
54  *
55  * Revision 1.4  2005/05/10 21:21:52  archan
56  * Three functionalities added but not tested. Code on 1) addition/deletion of LM in mode 4. 2) reading text-based LM 3) Converting txt-based LM to dmp-based LM.
57  *
58  * Revision 1.3  2005/03/30 01:22:46  archan
59  * Fixed mistakes in last updates. Add
60  *
61  *
62  * 28-Apr-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
63  * 		Created.
64  */
65 
66 
67 #ifndef _S3_BIO_H_
68 #define _S3_BIO_H_
69 
70 #include <stdio.h>
71 #include <stdarg.h>
72 
73 /* Win32/WinCE DLL gunk */
74 #include <sphinxbase/sphinxbase_export.h>
75 #include <sphinxbase/prim_type.h>
76 #include <sphinxbase/byteorder.h>
77 
78 /** \file bio.h
79  * \brief Cross platform binary IO to process files in sphinx3 format.
80  *
81  *
82  */
83 
84 #ifdef __cplusplus
85 extern "C" {
86 #endif
87 #if 0
88 /* Fool Emacs. */
89 }
90 #endif
91 
92 #define BYTE_ORDER_MAGIC	(0x11223344)
93 
94 /** "reversed senses" SWAP, ARCHAN: This is still incorporated in
95     Sphinx 3 because lm3g2dmp used it.  Don't think that I am very
96     happy with it. */
97 
98 #if (__BIG_ENDIAN__)
99 #define REVERSE_SENSE_SWAP_INT16(x)  x = ( (((x)<<8)&0x0000ff00) | (((x)>>8)&0x00ff) )
100 #define REVERSE_SENSE_SWAP_INT32(x)  x = ( (((x)<<24)&0xff000000) | (((x)<<8)&0x00ff0000) | \
101                          (((x)>>8)&0x0000ff00) | (((x)>>24)&0x000000ff) )
102 #else
103 #define REVERSE_SENSE_SWAP_INT16(x)
104 #define REVERSE_SENSE_SWAP_INT32(x)
105 
106 #endif
107 
108 
109 
110 /**
111  * Read binary file format header: has the following format
112  * <pre>
113  *     s3
114  *     <argument-name> <argument-value>
115  *     <argument-name> <argument-value>
116  *     ...
117  *     endhdr
118  *     4-byte byte-order word used to find file byte ordering relative to host machine.
119  * </pre>
120  * Lines beginning with # are ignored.
121  * Memory for name and val allocated by this function; use bio_hdrarg_free to free them.
122  * @return: 0 if successful, -1 otherwise.
123  */
124 SPHINXBASE_EXPORT
125 int32 bio_readhdr (FILE *fp,		/**< In: File to read */
126 		   char ***name,	/**< Out: array of argument name strings read */
127 		   char ***val,		/**< Out: corresponding value strings read */
128 		   int32 *swap	/**< Out: file needs byteswapping iff (*swap) */
129 		   );
130 /**
131  * Write a simple binary file header, containing only the version string.  Also write
132  * the byte order magic word.
133  * @return: 0 if successful, -1 otherwise.
134  */
135 SPHINXBASE_EXPORT
136 int32 bio_writehdr_version (FILE *fp,  /**< Output: File to write */
137 			    char *version /**< Input: A string of version */
138 	);
139 
140 
141 SPHINXBASE_EXPORT
142 int32 bio_writehdr(FILE *fp, ...);
143 
144 /**
145  * Free name and value strings previously allocated and returned by bio_readhdr.
146  */
147 SPHINXBASE_EXPORT
148 void bio_hdrarg_free (char **name,	/**< In: Array previously returned by bio_readhdr */
149 		      char **val	/**< In: Array previously returned by bio_readhdr */
150 		      );
151 
152 /**
153  * Like fread but perform byteswapping and accumulate checksum (the 2 extra arguments).
154  * But unlike fread, returns -1 if required number of elements (n_el) not read; also,
155  * no byteswapping or checksum accumulation is performed in that case.
156  */
157 SPHINXBASE_EXPORT
158 int32 bio_fread (void *buf,
159 		 int32 el_sz,
160 		 int32 n_el,
161 		 FILE *fp,              /**< In: An input file pointer */
162 		 int32 swap,		/**< In: Byteswap iff (swap != 0) */
163 		 uint32 *chksum	/**< In/Out: Accumulated checksum */
164 		 );
165 
166 /**
167  * Like fwrite but perform byteswapping and accumulate checksum (the 2 extra arguments).
168  * @return the number of elemens written (like fwrite).
169  */
170 SPHINXBASE_EXPORT
171 int32 bio_fwrite(void *buf,
172 		 int32 el_sz,
173 		 int32 n_el,
174 		 FILE *fp,              /**< In: An input file pointer */
175 		 int32 swap,		/**< In: Byteswap iff (swap != 0) */
176 		 uint32 *chksum	/**< In/Out: Accumulated checksum */
177 		 );
178 
179 /**
180  * Read a 1-d array (fashioned after fread):
181  *
182  *  - 4-byte array size (returned in n_el)
183  *  - memory allocated for the array and read (returned in buf)
184  *
185  * Byteswapping and checksum accumulation performed as necessary.
186  * Fails fatally if expected data not read.
187  * Return value: number of array elements allocated and read; -1 if error.
188  */
189 SPHINXBASE_EXPORT
190 int32 bio_fread_1d (void **buf,		/**< Out: contains array data; allocated by this
191 					   function; can be freed using ckd_free */
192 		    size_t el_sz,	/**< In: Array element size */
193 		    uint32 *n_el,	/**< Out: Number of array elements allocated/read */
194 		    FILE *fp,		/**< In: File to read */
195 		    int32 sw,		/**< In: Byteswap iff (swap != 0) */
196 		    uint32 *ck	/**< In/Out: Accumulated checksum */
197 		    );
198 
199 /**
200  * Read a 2-d matrix:
201  *
202  * - 4-byte # rows, # columns (returned in d1, d2, d3)
203  * - memory allocated for the array and read (returned in buf)
204  *
205  * Byteswapping and checksum accumulation performed as necessary.
206  * Fails fatally if expected data not read.
207  * Return value: number of array elements allocated and read; -1 if error.
208  */
209 SPHINXBASE_EXPORT
210 int32 bio_fread_2d(void ***arr,
211                    size_t e_sz,
212                    uint32 *d1,
213                    uint32 *d2,
214                    FILE *fp,
215                    uint32 swap,
216                    uint32 *chksum);
217 
218 /**
219  * Read a 3-d array (set of matrices)
220  *
221  * - 4-byte # matrices, # rows, # columns (returned in d1, d2, d3)
222  * - memory allocated for the array and read (returned in buf)
223  *
224  * Byteswapping and checksum accumulation performed as necessary.
225  * Fails fatally if expected data not read.
226  * Return value: number of array elements allocated and read; -1 if error.
227  */
228 SPHINXBASE_EXPORT
229 int32 bio_fread_3d(void ****arr,
230                    size_t e_sz,
231                    uint32 *d1,
232                    uint32 *d2,
233                    uint32 *d3,
234                    FILE *fp,
235                    uint32 swap,
236                    uint32 *chksum);
237 
238 /**
239  * Read and verify checksum at the end of binary file.  Fails fatally if there is
240  * a mismatch.
241  */
242 SPHINXBASE_EXPORT
243 void bio_verify_chksum (FILE *fp,	/**< In: File to read */
244 			int32 byteswap,	/**< In: Byteswap iff (swap != 0) */
245 			uint32 chksum	/**< In: Value to compare with checksum in file */
246 			);
247 
248 
249 /**
250  * Read raw data from the wav file.
251  *
252  * @param directory the folder where the file is located
253  * @param filename the name of the file
254  * @param extension file extension
255  * @param header the size of the header to skip usually 44 bytes.
256  * @param endian Endian of the data
257  * @param nsamps number of samples read
258  * @return pointer to the data
259  */
260 SPHINXBASE_EXPORT
261 int16* bio_read_wavfile(char const *directory,
262 			char const *filename,
263 			char const *extension,
264 			int32 header,
265 			int32 endian,
266 			int32 *nsamps);
267 
268 #ifdef __cplusplus
269 }
270 #endif
271 
272 #endif
273