1 /******************************************************************** 2 * * 3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. * 4 * * 5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * 6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * 7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * 8 * * 9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * 10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * 11 * * 12 ******************************************************************** 13 14 function: normalized modified discrete cosine transform 15 power of two length transform only [64 <= n ] 16 last mod: $Id: mdct.c,v 1.9 2002/10/16 09:17:39 xiphmont Exp $ 17 18 Original algorithm adapted long ago from _The use of multirate filter 19 banks for coding of high quality digital audio_, by T. Sporer, 20 K. Brandenburg and B. Edler, collection of the European Signal 21 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp 22 211-214 23 24 The below code implements an algorithm that no longer looks much like 25 that presented in the paper, but the basic structure remains if you 26 dig deep enough to see it. 27 28 This module DOES NOT INCLUDE code to generate/apply the window 29 function. Everybody has their own weird favorite including me... I 30 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may 31 vehemently disagree. 32 33 ********************************************************************/ 34 35 #include "ivorbiscodec.h" 36 #include "codebook.h" 37 #include "misc.h" 38 #include "mdct.h" 39 #include "mdct_lookup.h" 40 41 42 /* 8 point butterfly (in place) */ 43 STIN void mdct_butterfly_8(DATA_TYPE *x){ 44 45 REG_TYPE r0 = x[4] + x[0]; 46 REG_TYPE r1 = x[4] - x[0]; 47 REG_TYPE r2 = x[5] + x[1]; 48 REG_TYPE r3 = x[5] - x[1]; 49 REG_TYPE r4 = x[6] + x[2]; 50 REG_TYPE r5 = x[6] - x[2]; 51 REG_TYPE r6 = x[7] + x[3]; 52 REG_TYPE r7 = x[7] - x[3]; 53 54 x[0] = r5 + r3; 55 x[1] = r7 - r1; 56 x[2] = r5 - r3; 57 x[3] = r7 + r1; 58 x[4] = r4 - r0; 59 x[5] = r6 - r2; 60 x[6] = r4 + r0; 61 x[7] = r6 + r2; 62 MB(); 63 } 64 65 /* 16 point butterfly (in place, 4 register) */ 66 STIN void mdct_butterfly_16(DATA_TYPE *x){ 67 68 REG_TYPE r0, r1; 69 70 r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0]; 71 r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1]; 72 x[ 0] = MULT31((r0 + r1) , cPI2_8); 73 x[ 1] = MULT31((r1 - r0) , cPI2_8); 74 MB(); 75 76 r0 = x[10] - x[ 2]; x[10] += x[ 2]; 77 r1 = x[ 3] - x[11]; x[11] += x[ 3]; 78 x[ 2] = r1; x[ 3] = r0; 79 MB(); 80 81 r0 = x[12] - x[ 4]; x[12] += x[ 4]; 82 r1 = x[13] - x[ 5]; x[13] += x[ 5]; 83 x[ 4] = MULT31((r0 - r1) , cPI2_8); 84 x[ 5] = MULT31((r0 + r1) , cPI2_8); 85 MB(); 86 87 r0 = x[14] - x[ 6]; x[14] += x[ 6]; 88 r1 = x[15] - x[ 7]; x[15] += x[ 7]; 89 x[ 6] = r0; x[ 7] = r1; 90 MB(); 91 92 mdct_butterfly_8(x); 93 mdct_butterfly_8(x+8); 94 } 95 96 /* 32 point butterfly (in place, 4 register) */ 97 STIN void mdct_butterfly_32(DATA_TYPE *x){ 98 99 REG_TYPE r0, r1; 100 101 r0 = x[30] - x[14]; x[30] += x[14]; 102 r1 = x[31] - x[15]; x[31] += x[15]; 103 x[14] = r0; x[15] = r1; 104 MB(); 105 106 r0 = x[28] - x[12]; x[28] += x[12]; 107 r1 = x[29] - x[13]; x[29] += x[13]; 108 XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] ); 109 MB(); 110 111 r0 = x[26] - x[10]; x[26] += x[10]; 112 r1 = x[27] - x[11]; x[27] += x[11]; 113 x[10] = MULT31((r0 - r1) , cPI2_8); 114 x[11] = MULT31((r0 + r1) , cPI2_8); 115 MB(); 116 117 r0 = x[24] - x[ 8]; x[24] += x[ 8]; 118 r1 = x[25] - x[ 9]; x[25] += x[ 9]; 119 XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] ); 120 MB(); 121 122 r0 = x[22] - x[ 6]; x[22] += x[ 6]; 123 r1 = x[ 7] - x[23]; x[23] += x[ 7]; 124 x[ 6] = r1; x[ 7] = r0; 125 MB(); 126 127 r0 = x[ 4] - x[20]; x[20] += x[ 4]; 128 r1 = x[ 5] - x[21]; x[21] += x[ 5]; 129 XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] ); 130 MB(); 131 132 r0 = x[ 2] - x[18]; x[18] += x[ 2]; 133 r1 = x[ 3] - x[19]; x[19] += x[ 3]; 134 x[ 2] = MULT31((r1 + r0) , cPI2_8); 135 x[ 3] = MULT31((r1 - r0) , cPI2_8); 136 MB(); 137 138 r0 = x[ 0] - x[16]; x[16] += x[ 0]; 139 r1 = x[ 1] - x[17]; x[17] += x[ 1]; 140 XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] ); 141 MB(); 142 143 mdct_butterfly_16(x); 144 mdct_butterfly_16(x+16); 145 } 146 147 /* N/stage point generic N stage butterfly (in place, 2 register) */ 148 STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){ 149 150 LOOKUP_T *T = sincos_lookup0; 151 DATA_TYPE *x1 = x + points - 8; 152 DATA_TYPE *x2 = x + (points>>1) - 8; 153 REG_TYPE r0; 154 REG_TYPE r1; 155 156 do{ 157 r0 = x1[6] - x2[6]; x1[6] += x2[6]; 158 r1 = x2[7] - x1[7]; x1[7] += x2[7]; 159 XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step; 160 161 r0 = x1[4] - x2[4]; x1[4] += x2[4]; 162 r1 = x2[5] - x1[5]; x1[5] += x2[5]; 163 XPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T+=step; 164 165 r0 = x1[2] - x2[2]; x1[2] += x2[2]; 166 r1 = x2[3] - x1[3]; x1[3] += x2[3]; 167 XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step; 168 169 r0 = x1[0] - x2[0]; x1[0] += x2[0]; 170 r1 = x2[1] - x1[1]; x1[1] += x2[1]; 171 XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T+=step; 172 173 x1-=8; x2-=8; 174 }while(T<sincos_lookup0+1024); 175 do{ 176 r0 = x1[6] - x2[6]; x1[6] += x2[6]; 177 r1 = x1[7] - x2[7]; x1[7] += x2[7]; 178 XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step; 179 180 r0 = x1[4] - x2[4]; x1[4] += x2[4]; 181 r1 = x1[5] - x2[5]; x1[5] += x2[5]; 182 XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step; 183 184 r0 = x1[2] - x2[2]; x1[2] += x2[2]; 185 r1 = x1[3] - x2[3]; x1[3] += x2[3]; 186 XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step; 187 188 r0 = x1[0] - x2[0]; x1[0] += x2[0]; 189 r1 = x1[1] - x2[1]; x1[1] += x2[1]; 190 XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T-=step; 191 192 x1-=8; x2-=8; 193 }while(T>sincos_lookup0); 194 do{ 195 r0 = x2[6] - x1[6]; x1[6] += x2[6]; 196 r1 = x2[7] - x1[7]; x1[7] += x2[7]; 197 XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step; 198 199 r0 = x2[4] - x1[4]; x1[4] += x2[4]; 200 r1 = x2[5] - x1[5]; x1[5] += x2[5]; 201 XPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T+=step; 202 203 r0 = x2[2] - x1[2]; x1[2] += x2[2]; 204 r1 = x2[3] - x1[3]; x1[3] += x2[3]; 205 XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step; 206 207 r0 = x2[0] - x1[0]; x1[0] += x2[0]; 208 r1 = x2[1] - x1[1]; x1[1] += x2[1]; 209 XPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T+=step; 210 211 x1-=8; x2-=8; 212 }while(T<sincos_lookup0+1024); 213 do{ 214 r0 = x1[6] - x2[6]; x1[6] += x2[6]; 215 r1 = x2[7] - x1[7]; x1[7] += x2[7]; 216 XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step; 217 218 r0 = x1[4] - x2[4]; x1[4] += x2[4]; 219 r1 = x2[5] - x1[5]; x1[5] += x2[5]; 220 XNPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T-=step; 221 222 r0 = x1[2] - x2[2]; x1[2] += x2[2]; 223 r1 = x2[3] - x1[3]; x1[3] += x2[3]; 224 XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step; 225 226 r0 = x1[0] - x2[0]; x1[0] += x2[0]; 227 r1 = x2[1] - x1[1]; x1[1] += x2[1]; 228 XNPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T-=step; 229 230 x1-=8; x2-=8; 231 }while(T>sincos_lookup0); 232 } 233 234 STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){ 235 236 int stages=8-shift; 237 int i,j; 238 239 for(i=0;--stages>0;i++){ 240 for(j=0;j<(1<<i);j++) 241 mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift)); 242 } 243 244 for(j=0;j<points;j+=32) 245 mdct_butterfly_32(x+j); 246 247 } 248 249 static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15}; 250 251 STIN int bitrev12(int x){ 252 return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8); 253 } 254 255 STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){ 256 257 int bit = 0; 258 DATA_TYPE *w0 = x; 259 DATA_TYPE *w1 = x = w0+(n>>1); 260 LOOKUP_T *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1; 261 LOOKUP_T *Ttop = T+1024; 262 DATA_TYPE r2; 263 264 do{ 265 DATA_TYPE r3 = bitrev12(bit++); 266 DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1; 267 DATA_TYPE *x1 = x + (r3>>shift); 268 269 REG_TYPE r0 = x0[0] + x1[0]; 270 REG_TYPE r1 = x1[1] - x0[1]; 271 272 XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step; 273 274 w1 -= 4; 275 276 r0 = (x0[1] + x1[1])>>1; 277 r1 = (x0[0] - x1[0])>>1; 278 w0[0] = r0 + r2; 279 w0[1] = r1 + r3; 280 w1[2] = r0 - r2; 281 w1[3] = r3 - r1; 282 283 r3 = bitrev12(bit++); 284 x0 = x + ((r3 ^ 0xfff)>>shift) -1; 285 x1 = x + (r3>>shift); 286 287 r0 = x0[0] + x1[0]; 288 r1 = x1[1] - x0[1]; 289 290 XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step; 291 292 r0 = (x0[1] + x1[1])>>1; 293 r1 = (x0[0] - x1[0])>>1; 294 w0[2] = r0 + r2; 295 w0[3] = r1 + r3; 296 w1[0] = r0 - r2; 297 w1[1] = r3 - r1; 298 299 w0 += 4; 300 }while(T<Ttop); 301 do{ 302 DATA_TYPE r3 = bitrev12(bit++); 303 DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1; 304 DATA_TYPE *x1 = x + (r3>>shift); 305 306 REG_TYPE r0 = x0[0] + x1[0]; 307 REG_TYPE r1 = x1[1] - x0[1]; 308 309 T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 ); 310 311 w1 -= 4; 312 313 r0 = (x0[1] + x1[1])>>1; 314 r1 = (x0[0] - x1[0])>>1; 315 w0[0] = r0 + r2; 316 w0[1] = r1 + r3; 317 w1[2] = r0 - r2; 318 w1[3] = r3 - r1; 319 320 r3 = bitrev12(bit++); 321 x0 = x + ((r3 ^ 0xfff)>>shift) -1; 322 x1 = x + (r3>>shift); 323 324 r0 = x0[0] + x1[0]; 325 r1 = x1[1] - x0[1]; 326 327 T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 ); 328 329 r0 = (x0[1] + x1[1])>>1; 330 r1 = (x0[0] - x1[0])>>1; 331 w0[2] = r0 + r2; 332 w0[3] = r1 + r3; 333 w1[0] = r0 - r2; 334 w1[1] = r3 - r1; 335 336 w0 += 4; 337 }while(w0<w1); 338 } 339 340 void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){ 341 int n2=n>>1; 342 int n4=n>>2; 343 DATA_TYPE *iX; 344 DATA_TYPE *oX; 345 LOOKUP_T *T; 346 LOOKUP_T *V; 347 int shift; 348 int step; 349 350 for (shift=6;!(n&(1<<shift));shift++); 351 shift=13-shift; 352 step=2<<shift; 353 354 /* rotate */ 355 356 iX = in+n2-7; 357 oX = out+n2+n4; 358 T = sincos_lookup0; 359 360 do{ 361 oX-=4; 362 XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step; 363 XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step; 364 iX-=8; 365 }while(iX>=in+n4); 366 do{ 367 oX-=4; 368 XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step; 369 XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step; 370 iX-=8; 371 }while(iX>=in); 372 373 iX = in+n2-8; 374 oX = out+n2+n4; 375 T = sincos_lookup0; 376 377 do{ 378 T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] ); 379 T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] ); 380 iX-=8; 381 oX+=4; 382 }while(iX>=in+n4); 383 do{ 384 T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] ); 385 T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] ); 386 iX-=8; 387 oX+=4; 388 }while(iX>=in); 389 390 mdct_butterflies(out+n2,n2,shift); 391 mdct_bitreverse(out,n,step,shift); 392 393 /* rotate + window */ 394 395 step>>=2; 396 { 397 DATA_TYPE *oX1=out+n2+n4; 398 DATA_TYPE *oX2=out+n2+n4; 399 DATA_TYPE *iX =out; 400 401 switch(step) { 402 default: { 403 T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1; 404 do{ 405 oX1-=4; 406 XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step; 407 XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step; 408 XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step; 409 XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step; 410 oX2+=4; 411 iX+=8; 412 }while(iX<oX1); 413 break; 414 } 415 416 case 1: { 417 /* linear interpolation between table values: offset=0.5, step=1 */ 418 REG_TYPE t0,t1,v0,v1; 419 T = sincos_lookup0; 420 V = sincos_lookup1; 421 t0 = (*T++)>>1; 422 t1 = (*T++)>>1; 423 do{ 424 oX1-=4; 425 426 t0 += (v0 = (*V++)>>1); 427 t1 += (v1 = (*V++)>>1); 428 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] ); 429 v0 += (t0 = (*T++)>>1); 430 v1 += (t1 = (*T++)>>1); 431 XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] ); 432 t0 += (v0 = (*V++)>>1); 433 t1 += (v1 = (*V++)>>1); 434 XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] ); 435 v0 += (t0 = (*T++)>>1); 436 v1 += (t1 = (*T++)>>1); 437 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] ); 438 439 oX2+=4; 440 iX+=8; 441 }while(iX<oX1); 442 break; 443 } 444 445 case 0: { 446 /* linear interpolation between table values: offset=0.25, step=0.5 */ 447 REG_TYPE t0,t1,v0,v1,q0,q1; 448 T = sincos_lookup0; 449 V = sincos_lookup1; 450 t0 = *T++; 451 t1 = *T++; 452 do{ 453 oX1-=4; 454 455 v0 = *V++; 456 v1 = *V++; 457 t0 += (q0 = (v0-t0)>>2); 458 t1 += (q1 = (v1-t1)>>2); 459 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] ); 460 t0 = v0-q0; 461 t1 = v1-q1; 462 XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] ); 463 464 t0 = *T++; 465 t1 = *T++; 466 v0 += (q0 = (t0-v0)>>2); 467 v1 += (q1 = (t1-v1)>>2); 468 XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] ); 469 v0 = t0-q0; 470 v1 = t1-q1; 471 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] ); 472 473 oX2+=4; 474 iX+=8; 475 }while(iX<oX1); 476 break; 477 } 478 } 479 480 iX=out+n2+n4; 481 oX1=out+n4; 482 oX2=oX1; 483 484 do{ 485 oX1-=4; 486 iX-=4; 487 488 oX2[0] = -(oX1[3] = iX[3]); 489 oX2[1] = -(oX1[2] = iX[2]); 490 oX2[2] = -(oX1[1] = iX[1]); 491 oX2[3] = -(oX1[0] = iX[0]); 492 493 oX2+=4; 494 }while(oX2<iX); 495 496 iX=out+n2+n4; 497 oX1=out+n2+n4; 498 oX2=out+n2; 499 500 do{ 501 oX1-=4; 502 oX1[0]= iX[3]; 503 oX1[1]= iX[2]; 504 oX1[2]= iX[1]; 505 oX1[3]= iX[0]; 506 iX+=4; 507 }while(oX1>oX2); 508 } 509 } 510 511