1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * ad.h -- generic live audio interface for recording and playback
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1996 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  *
49  * $Log: ad.h,v $
50  * Revision 1.8  2005/06/22 08:00:06  arthchan2003
51  * Completed all doxygen documentation on file description for libs3decoder/libutil/libs3audio and programs.
52  *
53  * Revision 1.7  2004/12/14 00:39:49  arthchan2003
54  * add <s3types.h> to the code, change some comments to doxygen style
55  *
56  * Revision 1.6  2004/12/06 11:17:55  arthchan2003
57  * Update the copyright information of ad.h, *sigh* start to feel tired of updating documentation system.  Anyone who has time, please take up libs3audio. That is the last place which is undocumented
58  *
59  * Revision 1.5  2004/07/23 23:44:46  egouvea
60  * Changed the cygwin code to use the same audio files as the MS Visual code, removed unused variables from fe_interface.c
61  *
62  * Revision 1.4  2004/02/29 23:48:31  egouvea
63  * Updated configure.in to the recent automake/autoconf, fixed win32
64  * references in audio files.
65  *
66  * Revision 1.3  2002/11/10 19:27:38  egouvea
67  * Fixed references to sun's implementation of audio interface,
68  * referring to the correct .h file, and replacing sun4 with sunos.
69  *
70  * Revision 1.2  2001/12/11 04:40:55  lenzo
71  * License cleanup.
72  *
73  * Revision 1.1.1.1  2001/12/03 16:01:45  egouvea
74  * Initial import of sphinx3
75  *
76  * Revision 1.1.1.1  2001/01/17 05:17:14  ricky
77  * Initial Import of the s3.3 decoder, has working decodeaudiofile, s3.3_live
78  *
79  *
80  * 19-Jan-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
81  * 		Added AD_ return codes.  Added ad_open_sps_bufsize(), and
82  * 		ad_rec_t.n_buf.
83  *
84  * 17-Apr-98	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
85  * 		Added ad_open_play_sps().
86  *
87  * 07-Mar-98	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
88  * 		Added ad_open_sps().
89  *
90  * 10-Jun-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
91  * 		Added ad_wbuf_t, ad_rec_t, and ad_play_t types, and augmented all
92  * 		recording functions with ad_rec_t, and playback functions with
93  * 		ad_play_t.
94  *
95  * 06-Jun-96	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
96  *		Created.
97  */
98 
99 /** \file ad.h
100  * \brief generic live audio interface for recording and playback
101  */
102 
103 #ifndef _AD_H_
104 #define _AD_H_
105 
106 #include <sphinx_config.h>
107 
108 #if defined (__CYGWIN__)
109 #include <w32api/windows.h>
110 #include <w32api/mmsystem.h>
111 #elif (defined(WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
112 #include <windows.h>
113 #include <mmsystem.h>
114 #elif defined(AD_BACKEND_JACK)
115 #include <jack/jack.h>
116 #include <jack/ringbuffer.h>
117 #ifdef HAVE_SAMPLERATE_H
118 #include <samplerate.h>
119 #endif
120 #elif defined(AD_BACKEND_PULSEAUDIO)
121 #include <pulse/pulseaudio.h>
122 #include <pulse/simple.h>
123 #elif defined(AD_BACKEND_ALSA)
124 #include <alsa/asoundlib.h>
125 #endif
126 
127 /* Win32/WinCE DLL gunk */
128 #include <sphinxbase/sphinxbase_export.h>
129 
130 #include <sphinxbase/prim_type.h>
131 
132 #ifdef __cplusplus
133 extern "C" {
134 #endif
135 #if 0
136 /* Fool Emacs. */
137 }
138 #endif
139 
140 #define AD_SAMPLE_SIZE		(sizeof(int16))
141 #define DEFAULT_SAMPLES_PER_SEC	16000
142 
143 /* Return codes */
144 #define AD_OK		0
145 #define AD_EOF		-1
146 #define AD_ERR_GEN	-1
147 #define AD_ERR_NOT_OPEN	-2
148 #define AD_ERR_WAVE	-3
149 
150 
151 #if  (defined(WIN32) || defined(AD_BACKEND_WIN32)) && !defined(GNUWINCE)
152 typedef struct {
153     HGLOBAL h_whdr;
154     LPWAVEHDR p_whdr;
155     HGLOBAL h_buf;
156     LPSTR p_buf;
157 } ad_wbuf_t;
158 #endif
159 
160 
161 /* ------------ RECORDING -------------- */
162 
163 /*
164  * NOTE: ad_rec_t and ad_play_t are READ-ONLY structures for the user.
165  */
166 
167 #if (defined(WIN32) || defined(AD_BACKEND_WIN32)) && !defined(GNUWINCE)
168 
169 #define DEFAULT_DEVICE (char*)DEV_MAPPER
170 
171 /**
172  * Audio recording structure.
173  */
174 typedef struct ad_rec_s {
175     HWAVEIN h_wavein;	/* "HANDLE" to the audio input device */
176     ad_wbuf_t *wi_buf;	/* Recording buffers provided to system */
177     int32 n_buf;	/* #Recording buffers provided to system */
178     int32 opened;	/* Flag; A/D opened for recording */
179     int32 recording;
180     int32 curbuf;	/* Current buffer with data for application */
181     int32 curoff;	/* Start of data for application in curbuf */
182     int32 curlen;	/* #samples of data from curoff in curbuf */
183     int32 lastbuf;	/* Last buffer containing data after recording stopped */
184     int32 sps;		/* Samples/sec */
185     int32 bps;		/* Bytes/sample */
186 } ad_rec_t;
187 
188 #elif defined(AD_BACKEND_OSS)
189 
190 #define DEFAULT_DEVICE "/dev/dsp"
191 
192 /** \struct ad_rec_t
193  *  \brief Audio recording structure.
194  */
195 typedef struct {
196     int32 dspFD;	/* Audio device descriptor */
197     int32 recording;
198     int32 sps;		/* Samples/sec */
199     int32 bps;		/* Bytes/sample */
200 } ad_rec_t;
201 
202 #elif defined(AD_BACKEND_PULSEAUDIO)
203 
204 #define DEFAULT_DEVICE NULL
205 
206 typedef struct {
207     pa_simple* pa;
208     int32 recording;
209     int32 sps;
210     int32 bps;
211 } ad_rec_t;
212 
213 #elif defined(AD_BACKEND_ALSA)
214 
215 #define DEFAULT_DEVICE "default"
216 typedef struct {
217     snd_pcm_t *dspH;
218     int32 recording;
219     int32 sps;
220     int32 bps;
221 } ad_rec_t;
222 
223 #elif defined(AD_BACKEND_JACK)
224 
225 typedef struct {
226     jack_client_t *client;
227     jack_port_t *input_port;
228     jack_port_t *output_port;
229     jack_ringbuffer_t* rbuffer;
230     jack_default_audio_sample_t* sample_buffer;
231     int32 recording;
232     int32 sps;
233     int32 bps;
234 #ifdef HAVE_SAMPLERATE_H
235     SRC_STATE *resample_state;
236     jack_default_audio_sample_t *resample_buffer;
237 #endif
238 } ad_rec_t;
239 
240 #elif defined(AD_BACKEND_S60)
241 
242 typedef struct ad_rec_s {
243     void* recorder;
244     int32 recording;
245     int32 sps;
246     int32 bps;
247 } ad_rec_t;
248 
249 SPHINXBASE_EXPORT
250 ad_rec_t *ad_open_sps_bufsize (int32 samples_per_sec, int32 bufsize_msec);
251 
252 #else
253 
254 #define DEFAULT_DEVICE NULL
255 typedef struct {
256     int32 sps;		/**< Samples/sec */
257     int32 bps;		/**< Bytes/sample */
258 } ad_rec_t;
259 
260 
261 #endif
262 
263 
264 /**
265  * Open a specific audio device for recording.
266  *
267  * The device is opened in non-blocking mode and placed in idle state.
268  *
269  * @return pointer to read-only ad_rec_t structure if successful, NULL
270  * otherwise.  The return value to be used as the first argument to
271  * other recording functions.
272  */
273 SPHINXBASE_EXPORT
274 ad_rec_t *ad_open_dev (
275 	const char *dev, /**< Device name (platform-specific) */
276 	int32 samples_per_sec /**< Samples per second */
277 	);
278 
279 /**
280  * Open the default audio device with a given sampling rate.
281  */
282 SPHINXBASE_EXPORT
283 ad_rec_t *ad_open_sps (
284 		       int32 samples_per_sec /**< Samples per second */
285 		       );
286 
287 
288 /**
289  * Open the default audio device.
290  */
291 SPHINXBASE_EXPORT
292 ad_rec_t *ad_open ( void );
293 
294 
295 #if defined(WIN32) && !defined(GNUWINCE)
296 /*
297  * Like ad_open_sps but specifies buffering required within driver.  This function is
298  * useful if the default (5000 msec worth) is too small and results in loss of data.
299  */
300 SPHINXBASE_EXPORT
301 ad_rec_t *ad_open_sps_bufsize (int32 samples_per_sec, int32 bufsize_msec);
302 #endif
303 
304 
305 /* Start audio recording.  Return value: 0 if successful, <0 otherwise */
306 SPHINXBASE_EXPORT
307 int32 ad_start_rec (ad_rec_t *);
308 
309 
310 /* Stop audio recording.  Return value: 0 if successful, <0 otherwise */
311 SPHINXBASE_EXPORT
312 int32 ad_stop_rec (ad_rec_t *);
313 
314 
315 /* Close the recording device.  Return value: 0 if successful, <0 otherwise */
316 SPHINXBASE_EXPORT
317 int32 ad_close (ad_rec_t *);
318 
319 
320 /*
321  * Read next block of audio samples while recording; read upto max samples into buf.
322  * Return value: # samples actually read (could be 0 since non-blocking); -1 if not
323  * recording and no more samples remaining to be read from most recent recording.
324  */
325 SPHINXBASE_EXPORT
326 int32 ad_read (ad_rec_t *, int16 *buf, int32 max);
327 
328 
329 /* ------ PLAYBACK; SIMILAR TO RECORDING ------- */
330 
331 #if defined(WIN32) && !defined(GNUWINCE)
332 
333 typedef struct {
334     HWAVEOUT h_waveout;	/* "HANDLE" to the audio output device */
335     ad_wbuf_t *wo_buf;	/* Playback buffers given to the system */
336     int32 opened;	/* Flag; A/D opened for playback */
337     int32 playing;
338     char *busy;		/* flags [N_WO_BUF] indicating whether given to system */
339     int32 nxtbuf;	/* Next buffer [0..N_WO_BUF-1] to be used for playback data */
340     int32 sps;		/* Samples/sec */
341     int32 bps;		/* Bytes/sample */
342 } ad_play_t;
343 
344 #else
345 
346 typedef struct {
347     int32 sps;		/* Samples/sec */
348     int32 bps;		/* Bytes/sample */
349 } ad_play_t;	/* Dummy definition for systems without A/D stuff */
350 
351 #endif
352 
353 
354 SPHINXBASE_EXPORT
355 ad_play_t *ad_open_play_sps (int32 samples_per_sec);
356 
357 SPHINXBASE_EXPORT
358 ad_play_t *ad_open_play ( void );
359 
360 SPHINXBASE_EXPORT
361 int32 ad_start_play (ad_play_t *);
362 
363 SPHINXBASE_EXPORT
364 int32 ad_stop_play (ad_play_t *);
365 
366 SPHINXBASE_EXPORT
367 int32 ad_close_play (ad_play_t *);
368 
369 
370 /**
371  * Queue a block of audio samples for playback.
372  *
373  * Write the next block of [len] samples from rawbuf to the A/D device for playback.
374  * The device may queue less than len samples, possibly 0, since it is non-blocking.
375  * The application should resubmit the remaining data to be played.
376  * Return value: # samples accepted for playback; -1 if error.
377  */
378 SPHINXBASE_EXPORT
379 int32 ad_write (ad_play_t *, int16 *buf, int32 len);
380 
381 
382 /* ------ MISCELLANEOUS ------- */
383 
384 /**
385  * Convert mu-law data to int16 linear PCM format.
386  */
387 SPHINXBASE_EXPORT
388 void ad_mu2li (int16 *outbuf,		/* Out: PCM data placed here (allocated by user) */
389 	       unsigned char *inbuf,	/* In: Input buffer with mulaw data */
390 	       int32 n_samp);		/* In: #Samples in inbuf */
391 
392 #ifdef __cplusplus
393 }
394 #endif
395 
396 
397 #endif
398