1 /* 2 dct64.c: DCT64, the plain C version 3 4 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1 5 see COPYING and AUTHORS files in distribution or http://mpg123.org 6 initially written by Michael Hipp 7 */ 8 9 /* 10 * Discrete Cosine Tansform (DCT) for subband synthesis 11 * 12 * -funroll-loops (for gcc) will remove the loops for better performance 13 * using loops in the source-code enhances readabillity 14 * 15 * 16 * TODO: write an optimized version for the down-sampling modes 17 * (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero 18 */ 19 20 #include "mpg123lib_intern.h" 21 22 void dct64(real *out0,real *out1,real *samples) 23 { 24 real bufs[64]; 25 26 { 27 register int i,j; 28 register real *b1,*b2,*bs,*costab; 29 30 b1 = samples; 31 bs = bufs; 32 costab = pnts[0]+16; 33 b2 = b1 + 32; 34 35 for(i=15;i>=0;i--) 36 *bs++ = (*b1++ + *--b2); 37 for(i=15;i>=0;i--) 38 *bs++ = REAL_MUL((*--b2 - *b1++), *--costab); 39 40 b1 = bufs; 41 costab = pnts[1]+8; 42 b2 = b1 + 16; 43 44 { 45 for(i=7;i>=0;i--) 46 *bs++ = (*b1++ + *--b2); 47 for(i=7;i>=0;i--) 48 *bs++ = REAL_MUL((*--b2 - *b1++), *--costab); 49 b2 += 32; 50 costab += 8; 51 for(i=7;i>=0;i--) 52 *bs++ = (*b1++ + *--b2); 53 for(i=7;i>=0;i--) 54 *bs++ = REAL_MUL((*b1++ - *--b2), *--costab); 55 b2 += 32; 56 } 57 58 bs = bufs; 59 costab = pnts[2]; 60 b2 = b1 + 8; 61 62 for(j=2;j;j--) 63 { 64 for(i=3;i>=0;i--) 65 *bs++ = (*b1++ + *--b2); 66 for(i=3;i>=0;i--) 67 *bs++ = REAL_MUL((*--b2 - *b1++), costab[i]); 68 b2 += 16; 69 for(i=3;i>=0;i--) 70 *bs++ = (*b1++ + *--b2); 71 for(i=3;i>=0;i--) 72 *bs++ = REAL_MUL((*b1++ - *--b2), costab[i]); 73 b2 += 16; 74 } 75 76 b1 = bufs; 77 costab = pnts[3]; 78 b2 = b1 + 4; 79 80 for(j=4;j;j--) 81 { 82 *bs++ = (*b1++ + *--b2); 83 *bs++ = (*b1++ + *--b2); 84 *bs++ = REAL_MUL((*--b2 - *b1++), costab[1]); 85 *bs++ = REAL_MUL((*--b2 - *b1++), costab[0]); 86 b2 += 8; 87 *bs++ = (*b1++ + *--b2); 88 *bs++ = (*b1++ + *--b2); 89 *bs++ = REAL_MUL((*b1++ - *--b2), costab[1]); 90 *bs++ = REAL_MUL((*b1++ - *--b2), costab[0]); 91 b2 += 8; 92 } 93 bs = bufs; 94 costab = pnts[4]; 95 96 for(j=8;j;j--) 97 { 98 real v0,v1; 99 v0=*b1++; v1 = *b1++; 100 *bs++ = (v0 + v1); 101 *bs++ = REAL_MUL((v0 - v1), (*costab)); 102 v0=*b1++; v1 = *b1++; 103 *bs++ = (v0 + v1); 104 *bs++ = REAL_MUL((v1 - v0), (*costab)); 105 } 106 107 } 108 109 110 { 111 register real *b1; 112 register int i; 113 114 for(b1=bufs,i=8;i;i--,b1+=4) 115 b1[2] += b1[3]; 116 117 for(b1=bufs,i=4;i;i--,b1+=8) 118 { 119 b1[4] += b1[6]; 120 b1[6] += b1[5]; 121 b1[5] += b1[7]; 122 } 123 124 for(b1=bufs,i=2;i;i--,b1+=16) 125 { 126 b1[8] += b1[12]; 127 b1[12] += b1[10]; 128 b1[10] += b1[14]; 129 b1[14] += b1[9]; 130 b1[9] += b1[13]; 131 b1[13] += b1[11]; 132 b1[11] += b1[15]; 133 } 134 } 135 136 137 out0[0x10*16] = REAL_SCALE_DCT64(bufs[0]); 138 out0[0x10*15] = REAL_SCALE_DCT64(bufs[16+0] + bufs[16+8]); 139 out0[0x10*14] = REAL_SCALE_DCT64(bufs[8]); 140 out0[0x10*13] = REAL_SCALE_DCT64(bufs[16+8] + bufs[16+4]); 141 out0[0x10*12] = REAL_SCALE_DCT64(bufs[4]); 142 out0[0x10*11] = REAL_SCALE_DCT64(bufs[16+4] + bufs[16+12]); 143 out0[0x10*10] = REAL_SCALE_DCT64(bufs[12]); 144 out0[0x10* 9] = REAL_SCALE_DCT64(bufs[16+12] + bufs[16+2]); 145 out0[0x10* 8] = REAL_SCALE_DCT64(bufs[2]); 146 out0[0x10* 7] = REAL_SCALE_DCT64(bufs[16+2] + bufs[16+10]); 147 out0[0x10* 6] = REAL_SCALE_DCT64(bufs[10]); 148 out0[0x10* 5] = REAL_SCALE_DCT64(bufs[16+10] + bufs[16+6]); 149 out0[0x10* 4] = REAL_SCALE_DCT64(bufs[6]); 150 out0[0x10* 3] = REAL_SCALE_DCT64(bufs[16+6] + bufs[16+14]); 151 out0[0x10* 2] = REAL_SCALE_DCT64(bufs[14]); 152 out0[0x10* 1] = REAL_SCALE_DCT64(bufs[16+14] + bufs[16+1]); 153 out0[0x10* 0] = REAL_SCALE_DCT64(bufs[1]); 154 155 out1[0x10* 0] = REAL_SCALE_DCT64(bufs[1]); 156 out1[0x10* 1] = REAL_SCALE_DCT64(bufs[16+1] + bufs[16+9]); 157 out1[0x10* 2] = REAL_SCALE_DCT64(bufs[9]); 158 out1[0x10* 3] = REAL_SCALE_DCT64(bufs[16+9] + bufs[16+5]); 159 out1[0x10* 4] = REAL_SCALE_DCT64(bufs[5]); 160 out1[0x10* 5] = REAL_SCALE_DCT64(bufs[16+5] + bufs[16+13]); 161 out1[0x10* 6] = REAL_SCALE_DCT64(bufs[13]); 162 out1[0x10* 7] = REAL_SCALE_DCT64(bufs[16+13] + bufs[16+3]); 163 out1[0x10* 8] = REAL_SCALE_DCT64(bufs[3]); 164 out1[0x10* 9] = REAL_SCALE_DCT64(bufs[16+3] + bufs[16+11]); 165 out1[0x10*10] = REAL_SCALE_DCT64(bufs[11]); 166 out1[0x10*11] = REAL_SCALE_DCT64(bufs[16+11] + bufs[16+7]); 167 out1[0x10*12] = REAL_SCALE_DCT64(bufs[7]); 168 out1[0x10*13] = REAL_SCALE_DCT64(bufs[16+7] + bufs[16+15]); 169 out1[0x10*14] = REAL_SCALE_DCT64(bufs[15]); 170 out1[0x10*15] = REAL_SCALE_DCT64(bufs[16+15]); 171 172 } 173 174 175