1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 /* ==================================================================== 3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 4 * reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * This work was supported in part by funding from the Defense Advanced 19 * Research Projects Agency and the National Science Foundation of the 20 * United States of America, and the CMU Sphinx Speech Consortium. 21 * 22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * 34 * ==================================================================== 35 * 36 */ 37 /* 38 * ad.h -- generic live audio interface for recording and playback 39 * 40 * ********************************************** 41 * CMU ARPA Speech Project 42 * 43 * Copyright (c) 1996 Carnegie Mellon University. 44 * ALL RIGHTS RESERVED. 45 * ********************************************** 46 * 47 * HISTORY 48 * 49 * $Log: ad.h,v $ 50 * Revision 1.8 2005/06/22 08:00:06 arthchan2003 51 * Completed all doxygen documentation on file description for libs3decoder/libutil/libs3audio and programs. 52 * 53 * Revision 1.7 2004/12/14 00:39:49 arthchan2003 54 * add <s3types.h> to the code, change some comments to doxygen style 55 * 56 * Revision 1.6 2004/12/06 11:17:55 arthchan2003 57 * Update the copyright information of ad.h, *sigh* start to feel tired of updating documentation system. Anyone who has time, please take up libs3audio. That is the last place which is undocumented 58 * 59 * Revision 1.5 2004/07/23 23:44:46 egouvea 60 * Changed the cygwin code to use the same audio files as the MS Visual code, removed unused variables from fe_interface.c 61 * 62 * Revision 1.4 2004/02/29 23:48:31 egouvea 63 * Updated configure.in to the recent automake/autoconf, fixed win32 64 * references in audio files. 65 * 66 * Revision 1.3 2002/11/10 19:27:38 egouvea 67 * Fixed references to sun's implementation of audio interface, 68 * referring to the correct .h file, and replacing sun4 with sunos. 69 * 70 * Revision 1.2 2001/12/11 04:40:55 lenzo 71 * License cleanup. 72 * 73 * Revision 1.1.1.1 2001/12/03 16:01:45 egouvea 74 * Initial import of sphinx3 75 * 76 * Revision 1.1.1.1 2001/01/17 05:17:14 ricky 77 * Initial Import of the s3.3 decoder, has working decodeaudiofile, s3.3_live 78 * 79 * 80 * 19-Jan-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 81 * Added AD_ return codes. Added ad_open_sps_bufsize(), and 82 * ad_rec_t.n_buf. 83 * 84 * 17-Apr-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 85 * Added ad_open_play_sps(). 86 * 87 * 07-Mar-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 88 * Added ad_open_sps(). 89 * 90 * 10-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 91 * Added ad_wbuf_t, ad_rec_t, and ad_play_t types, and augmented all 92 * recording functions with ad_rec_t, and playback functions with 93 * ad_play_t. 94 * 95 * 06-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 96 * Created. 97 */ 98 99 /** \file ad.h 100 * \brief generic live audio interface for recording and playback 101 */ 102 103 #ifndef _AD_H_ 104 #define _AD_H_ 105 106 #include <sphinx_config.h> 107 108 #if defined (__CYGWIN__) 109 #include <w32api/windows.h> 110 #include <w32api/mmsystem.h> 111 #elif (defined(WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE) 112 #include <windows.h> 113 #include <mmsystem.h> 114 #elif defined(AD_BACKEND_JACK) 115 #include <jack/jack.h> 116 #include <jack/ringbuffer.h> 117 #ifdef HAVE_SAMPLERATE_H 118 #include <samplerate.h> 119 #endif 120 #elif defined(AD_BACKEND_PULSEAUDIO) 121 #include <pulse/pulseaudio.h> 122 #include <pulse/simple.h> 123 #elif defined(AD_BACKEND_ALSA) 124 #include <alsa/asoundlib.h> 125 #endif 126 127 /* Win32/WinCE DLL gunk */ 128 #include <sphinxbase/sphinxbase_export.h> 129 130 #include <sphinxbase/prim_type.h> 131 132 #ifdef __cplusplus 133 extern "C" { 134 #endif 135 #if 0 136 /* Fool Emacs. */ 137 } 138 #endif 139 140 #define AD_SAMPLE_SIZE (sizeof(int16)) 141 #define DEFAULT_SAMPLES_PER_SEC 16000 142 143 /* Return codes */ 144 #define AD_OK 0 145 #define AD_EOF -1 146 #define AD_ERR_GEN -1 147 #define AD_ERR_NOT_OPEN -2 148 #define AD_ERR_WAVE -3 149 150 151 #if (defined(WIN32) || defined(AD_BACKEND_WIN32)) && !defined(GNUWINCE) 152 typedef struct { 153 HGLOBAL h_whdr; 154 LPWAVEHDR p_whdr; 155 HGLOBAL h_buf; 156 LPSTR p_buf; 157 } ad_wbuf_t; 158 #endif 159 160 161 /* ------------ RECORDING -------------- */ 162 163 /* 164 * NOTE: ad_rec_t and ad_play_t are READ-ONLY structures for the user. 165 */ 166 167 #if (defined(WIN32) || defined(AD_BACKEND_WIN32)) && !defined(GNUWINCE) 168 169 #define DEFAULT_DEVICE (char*)DEV_MAPPER 170 171 /** 172 * Audio recording structure. 173 */ 174 typedef struct ad_rec_s { 175 HWAVEIN h_wavein; /* "HANDLE" to the audio input device */ 176 ad_wbuf_t *wi_buf; /* Recording buffers provided to system */ 177 int32 n_buf; /* #Recording buffers provided to system */ 178 int32 opened; /* Flag; A/D opened for recording */ 179 int32 recording; 180 int32 curbuf; /* Current buffer with data for application */ 181 int32 curoff; /* Start of data for application in curbuf */ 182 int32 curlen; /* #samples of data from curoff in curbuf */ 183 int32 lastbuf; /* Last buffer containing data after recording stopped */ 184 int32 sps; /* Samples/sec */ 185 int32 bps; /* Bytes/sample */ 186 } ad_rec_t; 187 188 #elif defined(AD_BACKEND_OSS) 189 190 #define DEFAULT_DEVICE "/dev/dsp" 191 192 /** \struct ad_rec_t 193 * \brief Audio recording structure. 194 */ 195 typedef struct { 196 int32 dspFD; /* Audio device descriptor */ 197 int32 recording; 198 int32 sps; /* Samples/sec */ 199 int32 bps; /* Bytes/sample */ 200 } ad_rec_t; 201 202 #elif defined(AD_BACKEND_PULSEAUDIO) 203 204 #define DEFAULT_DEVICE NULL 205 206 typedef struct { 207 pa_simple* pa; 208 int32 recording; 209 int32 sps; 210 int32 bps; 211 } ad_rec_t; 212 213 #elif defined(AD_BACKEND_ALSA) 214 215 #define DEFAULT_DEVICE "default" 216 typedef struct { 217 snd_pcm_t *dspH; 218 int32 recording; 219 int32 sps; 220 int32 bps; 221 } ad_rec_t; 222 223 #elif defined(AD_BACKEND_JACK) 224 225 typedef struct { 226 jack_client_t *client; 227 jack_port_t *input_port; 228 jack_port_t *output_port; 229 jack_ringbuffer_t* rbuffer; 230 jack_default_audio_sample_t* sample_buffer; 231 int32 recording; 232 int32 sps; 233 int32 bps; 234 #ifdef HAVE_SAMPLERATE_H 235 SRC_STATE *resample_state; 236 jack_default_audio_sample_t *resample_buffer; 237 #endif 238 } ad_rec_t; 239 240 #elif defined(AD_BACKEND_S60) 241 242 typedef struct ad_rec_s { 243 void* recorder; 244 int32 recording; 245 int32 sps; 246 int32 bps; 247 } ad_rec_t; 248 249 SPHINXBASE_EXPORT 250 ad_rec_t *ad_open_sps_bufsize (int32 samples_per_sec, int32 bufsize_msec); 251 252 #else 253 254 #define DEFAULT_DEVICE NULL 255 typedef struct { 256 int32 sps; /**< Samples/sec */ 257 int32 bps; /**< Bytes/sample */ 258 } ad_rec_t; 259 260 261 #endif 262 263 264 /** 265 * Open a specific audio device for recording. 266 * 267 * The device is opened in non-blocking mode and placed in idle state. 268 * 269 * @return pointer to read-only ad_rec_t structure if successful, NULL 270 * otherwise. The return value to be used as the first argument to 271 * other recording functions. 272 */ 273 SPHINXBASE_EXPORT 274 ad_rec_t *ad_open_dev ( 275 const char *dev, /**< Device name (platform-specific) */ 276 int32 samples_per_sec /**< Samples per second */ 277 ); 278 279 /** 280 * Open the default audio device with a given sampling rate. 281 */ 282 SPHINXBASE_EXPORT 283 ad_rec_t *ad_open_sps ( 284 int32 samples_per_sec /**< Samples per second */ 285 ); 286 287 288 /** 289 * Open the default audio device. 290 */ 291 SPHINXBASE_EXPORT 292 ad_rec_t *ad_open ( void ); 293 294 295 #if defined(WIN32) && !defined(GNUWINCE) 296 /* 297 * Like ad_open_sps but specifies buffering required within driver. This function is 298 * useful if the default (5000 msec worth) is too small and results in loss of data. 299 */ 300 SPHINXBASE_EXPORT 301 ad_rec_t *ad_open_sps_bufsize (int32 samples_per_sec, int32 bufsize_msec); 302 #endif 303 304 305 /* Start audio recording. Return value: 0 if successful, <0 otherwise */ 306 SPHINXBASE_EXPORT 307 int32 ad_start_rec (ad_rec_t *); 308 309 310 /* Stop audio recording. Return value: 0 if successful, <0 otherwise */ 311 SPHINXBASE_EXPORT 312 int32 ad_stop_rec (ad_rec_t *); 313 314 315 /* Close the recording device. Return value: 0 if successful, <0 otherwise */ 316 SPHINXBASE_EXPORT 317 int32 ad_close (ad_rec_t *); 318 319 320 /* 321 * Read next block of audio samples while recording; read upto max samples into buf. 322 * Return value: # samples actually read (could be 0 since non-blocking); -1 if not 323 * recording and no more samples remaining to be read from most recent recording. 324 */ 325 SPHINXBASE_EXPORT 326 int32 ad_read (ad_rec_t *, int16 *buf, int32 max); 327 328 329 /* ------ PLAYBACK; SIMILAR TO RECORDING ------- */ 330 331 #if defined(WIN32) && !defined(GNUWINCE) 332 333 typedef struct { 334 HWAVEOUT h_waveout; /* "HANDLE" to the audio output device */ 335 ad_wbuf_t *wo_buf; /* Playback buffers given to the system */ 336 int32 opened; /* Flag; A/D opened for playback */ 337 int32 playing; 338 char *busy; /* flags [N_WO_BUF] indicating whether given to system */ 339 int32 nxtbuf; /* Next buffer [0..N_WO_BUF-1] to be used for playback data */ 340 int32 sps; /* Samples/sec */ 341 int32 bps; /* Bytes/sample */ 342 } ad_play_t; 343 344 #else 345 346 typedef struct { 347 int32 sps; /* Samples/sec */ 348 int32 bps; /* Bytes/sample */ 349 } ad_play_t; /* Dummy definition for systems without A/D stuff */ 350 351 #endif 352 353 354 SPHINXBASE_EXPORT 355 ad_play_t *ad_open_play_sps (int32 samples_per_sec); 356 357 SPHINXBASE_EXPORT 358 ad_play_t *ad_open_play ( void ); 359 360 SPHINXBASE_EXPORT 361 int32 ad_start_play (ad_play_t *); 362 363 SPHINXBASE_EXPORT 364 int32 ad_stop_play (ad_play_t *); 365 366 SPHINXBASE_EXPORT 367 int32 ad_close_play (ad_play_t *); 368 369 370 /** 371 * Queue a block of audio samples for playback. 372 * 373 * Write the next block of [len] samples from rawbuf to the A/D device for playback. 374 * The device may queue less than len samples, possibly 0, since it is non-blocking. 375 * The application should resubmit the remaining data to be played. 376 * Return value: # samples accepted for playback; -1 if error. 377 */ 378 SPHINXBASE_EXPORT 379 int32 ad_write (ad_play_t *, int16 *buf, int32 len); 380 381 382 /* ------ MISCELLANEOUS ------- */ 383 384 /** 385 * Convert mu-law data to int16 linear PCM format. 386 */ 387 SPHINXBASE_EXPORT 388 void ad_mu2li (int16 *outbuf, /* Out: PCM data placed here (allocated by user) */ 389 unsigned char *inbuf, /* In: Input buffer with mulaw data */ 390 int32 n_samp); /* In: #Samples in inbuf */ 391 392 #ifdef __cplusplus 393 } 394 #endif 395 396 397 #endif 398