1 /* 2 * Copyright (c) 2003 Orion Hodson <orion@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * MAINTAINER: Orion Hodson <orion@freebsd.org> 27 * 28 * This rate conversion code uses linear interpolation without any 29 * pre- or post- interpolation filtering to combat aliasing. This 30 * greatly limits the sound quality and should be addressed at some 31 * stage in the future. 32 * 33 * Since this accuracy of interpolation is sensitive and examination 34 * of the algorithm output is harder from the kernel, th code is 35 * designed to be compiled in the kernel and in a userland test 36 * harness. This is done by selectively including and excluding code 37 * with several portions based on whether _KERNEL is defined. It's a 38 * little ugly, but exceedingly useful. The testsuite and its 39 * revisions can be found at: 40 * http://people.freebsd.org/~orion/feedrate/ 41 * 42 * Special thanks to Ken Marx for exposing flaws in the code and for 43 * testing revisions. 44 * 45 * $FreeBSD: src/sys/dev/sound/pcm/feeder_rate.c,v 1.2.2.3 2003/02/08 02:38:21 orion Exp $ 46 * $DragonFly: src/sys/dev/sound/pcm/feeder_rate.c,v 1.2 2003/06/17 04:28:31 dillon Exp $ 47 */ 48 49 #ifdef _KERNEL 50 51 #include <dev/sound/pcm/sound.h> 52 #include "feeder_if.h" 53 54 SND_DECLARE_FILE("$DragonFly: src/sys/dev/sound/pcm/feeder_rate.c,v 1.2 2003/06/17 04:28:31 dillon Exp $"); 55 56 #endif /* _KERNEL */ 57 58 MALLOC_DEFINE(M_RATEFEEDER, "ratefeed", "pcm rate feeder"); 59 60 #ifndef RATE_ASSERT 61 #define RATE_ASSERT(x, y) /* KASSERT(x) */ 62 #endif /* RATE_ASSERT */ 63 64 #ifndef RATE_TRACE 65 #define RATE_TRACE(x...) /* printf(x) */ 66 #endif 67 68 /*****************************************************************************/ 69 70 /* The following coefficients are coupled. They are chosen to be 71 * guarantee calculable factors for the interpolation routine. They 72 * have been tested over the range of RATEMIN-RATEMAX Hz. Decreasing 73 * the granularity increases the required buffer size and affects the 74 * gain values at different points in the space. These values were 75 * found by running the test program with -p (probe) and some trial 76 * and error. 77 * 78 * ROUNDHZ the granularity of sample rates (fits n*11025 and n*8000). 79 * FEEDBUFSZ the amount of buffer space. 80 * MINGAIN the minimum acceptable gain in coefficients search. 81 */ 82 #define ROUNDHZ 25 83 #define FEEDBUFSZ 8192 84 #define MINGAIN 92 85 86 #define RATEMIN 4000 87 #define RATEMAX 48000 88 89 struct feed_rate_info; 90 91 typedef int (*rate_convert_method)(struct feed_rate_info *, 92 uint32_t, uint32_t, int16_t *); 93 94 static int 95 convert_stereo_up(struct feed_rate_info *info, 96 uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst); 97 98 static int 99 convert_stereo_down(struct feed_rate_info *info, 100 uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst); 101 102 struct feed_rate_info { 103 uint32_t src, dst; /* source and destination rates */ 104 uint16_t buffer_ticks; /* number of available samples in buffer */ 105 uint16_t buffer_pos; /* next available sample in buffer */ 106 uint16_t rounds; /* maximum number of cycle rounds w buffer */ 107 uint16_t alpha; /* interpolation distance */ 108 uint16_t sscale; /* src clock scale */ 109 uint16_t dscale; /* dst clock scale */ 110 uint16_t mscale; /* scale factor to avoid divide per sample */ 111 uint16_t mroll; /* roll to again avoid divide per sample */ 112 uint16_t channels; /* 1 = mono, 2 = stereo */ 113 114 rate_convert_method convert; 115 int16_t buffer[FEEDBUFSZ]; 116 }; 117 118 #define bytes_per_sample 2 119 #define src_ticks_per_cycle(info) (info->dscale * info->rounds) 120 #define dst_ticks_per_cycle(info) (info->sscale * info->rounds) 121 #define bytes_per_tick(info) (info->channels * bytes_per_sample) 122 #define src_bytes_per_cycle(info) \ 123 (src_ticks_per_cycle(info) * bytes_per_tick(info)) 124 #define dst_bytes_per_cycle(info) \ 125 (dst_ticks_per_cycle(info) * bytes_per_tick(info)) 126 127 static uint32_t 128 gcd(uint32_t x, uint32_t y) 129 { 130 uint32_t w; 131 while (y != 0) { 132 w = x % y; 133 x = y; 134 y = w; 135 } 136 return x; 137 } 138 139 static int 140 feed_rate_setup(struct pcm_feeder *f) 141 { 142 struct feed_rate_info *info = f->data; 143 uint32_t mscale, mroll, l, r, g; 144 145 /* Beat sample rates down by greatest common divisor */ 146 g = gcd(info->src, info->dst); 147 info->sscale = info->dst / g; 148 info->dscale = info->src / g; 149 150 info->alpha = 0; 151 info->buffer_ticks = 0; 152 info->buffer_pos = 0; 153 154 /* Pick suitable conversion routine */ 155 if (info->src > info->dst) { 156 info->convert = convert_stereo_down; 157 } else { 158 info->convert = convert_stereo_up; 159 } 160 161 /* 162 * Determine number of conversion rounds that will fit into 163 * buffer. NB Must set info->rounds to one before using 164 * src_ticks_per_cycle here since it used by src_ticks_per_cycle. 165 */ 166 info->rounds = 1; 167 r = (FEEDBUFSZ - bytes_per_tick(info)) / 168 (src_ticks_per_cycle(info) * bytes_per_tick(info)); 169 if (r == 0) { 170 RATE_TRACE("Insufficient buffer space for conversion %d -> %d " 171 "(%d < %d)\n", info->src, info->dst, FEEDBUFSZ, 172 src_ticks_per_cycle(info) * bytes_per_tick(info)); 173 return -1; 174 } 175 info->rounds = r; 176 177 /* 178 * Find scale and roll combination that allows us to trade 179 * costly divide operations in the main loop for multiply-rolls. 180 */ 181 for (l = 96; l >= MINGAIN; l -= 3) { 182 for (mroll = 0; mroll < 16; mroll ++) { 183 mscale = (1 << mroll) / info->sscale; 184 185 r = (mscale * info->sscale * 100) >> mroll; 186 if (r > l && r <= 100) { 187 info->mscale = mscale; 188 info->mroll = mroll; 189 RATE_TRACE("Converting %d to %d with " 190 "mscale = %d and mroll = %d " 191 "(gain = %d / 100)\n", 192 info->src, info->dst, 193 info->mscale, info->mroll, r); 194 return 0; 195 } 196 } 197 } 198 199 RATE_TRACE("Failed to find a converter within %d%% gain for " 200 "%d to %d.\n", l, info->src, info->dst); 201 202 return -2; 203 } 204 205 static int 206 feed_rate_set(struct pcm_feeder *f, int what, int value) 207 { 208 struct feed_rate_info *info = f->data; 209 int rvalue; 210 211 if (value < RATEMIN || value > RATEMAX) { 212 return -1; 213 } 214 215 rvalue = (value / ROUNDHZ) * ROUNDHZ; 216 if (value - rvalue > ROUNDHZ / 2) { 217 rvalue += ROUNDHZ; 218 } 219 220 switch(what) { 221 case FEEDRATE_SRC: 222 info->src = rvalue; 223 break; 224 case FEEDRATE_DST: 225 info->dst = rvalue; 226 break; 227 default: 228 return -1; 229 } 230 231 return feed_rate_setup(f); 232 } 233 234 static int 235 feed_rate_get(struct pcm_feeder *f, int what) 236 { 237 struct feed_rate_info *info = f->data; 238 239 switch(what) { 240 case FEEDRATE_SRC: 241 return info->src; 242 case FEEDRATE_DST: 243 return info->dst; 244 default: 245 return -1; 246 } 247 return -1; 248 } 249 250 static int 251 feed_rate_init(struct pcm_feeder *f) 252 { 253 struct feed_rate_info *info; 254 255 info = malloc(sizeof(*info), M_RATEFEEDER, M_WAITOK | M_ZERO); 256 if (info == NULL) 257 return ENOMEM; 258 259 info->src = DSP_DEFAULT_SPEED; 260 info->dst = DSP_DEFAULT_SPEED; 261 info->channels = 2; 262 263 f->data = info; 264 return 0; 265 } 266 267 static int 268 feed_rate_free(struct pcm_feeder *f) 269 { 270 struct feed_rate_info *info = f->data; 271 272 if (info) { 273 free(info, M_RATEFEEDER); 274 } 275 f->data = NULL; 276 return 0; 277 } 278 279 static int 280 convert_stereo_up(struct feed_rate_info *info, 281 uint32_t src_ticks, 282 uint32_t dst_ticks, 283 int16_t *dst) 284 { 285 uint32_t max_dst_ticks; 286 int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o; 287 int16_t *src; 288 289 sp = info->buffer_pos * 2; 290 se = sp + src_ticks * 2; 291 292 src = info->buffer; 293 alpha = info->alpha * info->mscale; 294 dalpha = info->dscale * info->mscale; /* Alpha increment */ 295 malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */ 296 mroll = info->mroll; 297 298 /* 299 * For efficiency the main conversion loop should only depend on 300 * one variable. We use the state to work out the maximum number 301 * of output samples that are available and eliminate the checking of 302 * sp from the loop. 303 */ 304 max_dst_ticks = src_ticks * info->dst / info->src - alpha / dalpha; 305 if (max_dst_ticks < dst_ticks) { 306 dst_ticks = max_dst_ticks; 307 } 308 309 dp = 0; 310 de = dst_ticks * 2; 311 /* 312 * Unrolling this loop manually does not help much here because 313 * of the alpha, malpha comparison. 314 */ 315 while (dp < de) { 316 o = malpha - alpha; 317 x = alpha * src[sp + 2] + o * src[sp]; 318 dst[dp++] = x >> mroll; 319 x = alpha * src[sp + 3] + o * src[sp + 1]; 320 dst[dp++] = x >> mroll; 321 alpha += dalpha; 322 if (alpha >= malpha) { 323 alpha -= malpha; 324 sp += 2; 325 } 326 } 327 RATE_ASSERT(sp <= se, ("%s: Source overrun\n", __func__)); 328 329 info->buffer_pos = sp / info->channels; 330 info->alpha = alpha / info->mscale; 331 332 return dp / info->channels; 333 } 334 335 static int 336 convert_stereo_down(struct feed_rate_info *info, 337 uint32_t src_ticks, 338 uint32_t dst_ticks, 339 int16_t *dst) 340 { 341 int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o, m, 342 mdalpha, mstep; 343 int16_t *src; 344 345 sp = info->buffer_pos * 2; 346 se = sp + src_ticks * 2; 347 348 src = info->buffer; 349 alpha = info->alpha * info->mscale; 350 dalpha = info->dscale * info->mscale; /* Alpha increment */ 351 malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */ 352 mroll = info->mroll; 353 354 dp = 0; 355 de = dst_ticks * 2; 356 357 m = dalpha / malpha; 358 mstep = m * 2; 359 mdalpha = dalpha - m * malpha; 360 361 /* 362 * TODO: eliminate sp or dp from this loop comparison for a few 363 * extra % performance. 364 */ 365 while (sp < se && dp < de) { 366 o = malpha - alpha; 367 x = alpha * src[sp + 2] + o * src[sp]; 368 dst[dp++] = x >> mroll; 369 x = alpha * src[sp + 3] + o * src[sp + 1]; 370 dst[dp++] = x >> mroll; 371 372 alpha += mdalpha; 373 sp += mstep; 374 if (alpha >= malpha) { 375 alpha -= malpha; 376 sp += 2; 377 } 378 } 379 380 info->buffer_pos = sp / 2; 381 info->alpha = alpha / info->mscale; 382 383 RATE_ASSERT(info->buffer_pos <= info->buffer_ticks, 384 ("%s: Source overrun\n", __func__)); 385 386 return dp / 2; 387 } 388 389 static int 390 feed_rate(struct pcm_feeder *f, 391 struct pcm_channel *c, 392 uint8_t *b, 393 uint32_t count, 394 void *source) 395 { 396 struct feed_rate_info *info = f->data; 397 398 uint32_t done, s_ticks, d_ticks; 399 done = 0; 400 401 RATE_ASSERT(info->channels == 2, 402 ("%s: channels (%d) != 2", __func__, info->channels)); 403 404 while (done < count) { 405 /* Slurp in more data if input buffer is not full */ 406 while (info->buffer_ticks < src_ticks_per_cycle(info)) { 407 uint8_t *u8b; 408 int fetch; 409 fetch = src_bytes_per_cycle(info) - 410 info->buffer_ticks * bytes_per_tick(info); 411 u8b = (uint8_t*)info->buffer + 412 (info->buffer_ticks + 1) * 413 bytes_per_tick(info); 414 fetch = FEEDER_FEED(f->source, c, u8b, fetch, source); 415 RATE_ASSERT(fetch % bytes_per_tick(info) == 0, 416 ("%s: fetched unaligned bytes (%d)", 417 __func__, fetch)); 418 info->buffer_ticks += fetch / bytes_per_tick(info); 419 RATE_ASSERT(src_ticks_per_cycle(info) >= 420 info->buffer_ticks, 421 ("%s: buffer overfilled (%d > %d).", 422 __func__, info->buffer_ticks, 423 src_ticks_per_cycle(info))); 424 if (fetch == 0) 425 break; 426 } 427 428 /* Find amount of input buffer data that should be processed */ 429 d_ticks = (count - done) / bytes_per_tick(info); 430 s_ticks = info->buffer_ticks - info->buffer_pos; 431 if (info->buffer_ticks != src_ticks_per_cycle(info)) { 432 if (s_ticks > 8) 433 s_ticks -= 8; 434 else 435 s_ticks = 0; 436 } 437 438 d_ticks = info->convert(info, s_ticks, d_ticks, 439 (int16_t*)(b + done)); 440 if (d_ticks == 0) 441 break; 442 done += d_ticks * bytes_per_tick(info); 443 444 RATE_ASSERT(info->buffer_pos <= info->buffer_ticks, 445 ("%s: buffer_ticks too big\n", __func__)); 446 RATE_ASSERT(info->buffer_ticks <= src_ticks_per_cycle(info), 447 ("too many ticks %d / %d\n", 448 info->buffer_ticks, src_ticks_per_cycle(info))); 449 RATE_TRACE("%s: ticks %5d / %d pos %d\n", __func__, 450 info->buffer_ticks, src_ticks_per_cycle(info), 451 info->buffer_pos); 452 453 if (src_ticks_per_cycle(info) <= info->buffer_pos) { 454 /* End of cycle reached, copy last samples to start */ 455 uint8_t *u8b; 456 u8b = (uint8_t*)info->buffer; 457 bcopy(u8b + src_bytes_per_cycle(info), u8b, 458 bytes_per_tick(info)); 459 460 RATE_ASSERT(info->alpha == 0, 461 ("%s: completed cycle with " 462 "alpha non-zero", __func__, info->alpha)); 463 464 info->buffer_pos = 0; 465 info->buffer_ticks = 0; 466 } 467 } 468 469 RATE_ASSERT(count >= done, 470 ("%s: generated too many bytes of data (%d > %d).", 471 __func__, done, count)); 472 473 if (done != count) { 474 RATE_TRACE("Only did %d of %d\n", done, count); 475 } 476 477 return done; 478 } 479 480 static struct pcm_feederdesc feeder_rate_desc[] = { 481 {FEEDER_RATE, AFMT_S16_LE | AFMT_STEREO, AFMT_S16_LE | AFMT_STEREO, 0}, 482 {0, 0, 0, 0}, 483 }; 484 static kobj_method_t feeder_rate_methods[] = { 485 KOBJMETHOD(feeder_init, feed_rate_init), 486 KOBJMETHOD(feeder_free, feed_rate_free), 487 KOBJMETHOD(feeder_set, feed_rate_set), 488 KOBJMETHOD(feeder_get, feed_rate_get), 489 KOBJMETHOD(feeder_feed, feed_rate), 490 {0, 0} 491 }; 492 FEEDER_DECLARE(feeder_rate, 2, NULL); 493 494