1 /* K=9 r=1/2 Viterbi decoder with Intel SIMD
2 * May 2001, Phil Karn, KA9Q
3 */
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <memory.h>
7 #include "viterbi29.h"
8 #include "parity.h"
9
10 static int V29_init;
11 int cpu_features(void);
12
13 #if defined(SSE2)
14 char id_viterbi29[] = "k=9 r=1/2 Viterbi decoder, SSE2 version";
15 #elif defined(SSE)
16 char id_viterbi29[] = "k=9 r=1/2 Viterbi decoder, SSE version";
17 #elif defined(MMX)
18 char id_viterbi29[] = "k=9 r=1/2 Viterbi decoder, MMX version";
19 #else
20 char id_viterbi29[] = "k=9 r=1/2 Viterbi decoder, portable C version";
21 #endif
22
23 #if defined(MMX)
24
25 typedef union { long long p; char c[256]; } decision_t;
26 #define EXTRACT_DECISION(d,state) ((d)->c[state] & 1)
27
28 /* Combined tables used by mmxbfly */
29 unsigned char Mettab29_1[16][128] __attribute__ ((aligned(32)));
30 unsigned char Mettab29_2[16][128] __attribute__ ((aligned(32)));
31
32 #else
33
34 typedef union { long long p; unsigned long w[8]; } decision_t;
35 #define EXTRACT_DECISION(d,state) (((d)->w[state/32] >> (state%32)) & 1)
36
37 /* Symbol branch table used by ssebfly and sse2bfly */
38 unsigned char Branchtab29_1[128] __attribute__ ((aligned(32)));
39 unsigned char Branchtab29_2[128] __attribute__ ((aligned(32)));
40
41 #endif
42
43 /* State info for instance of Viterbi decoder
44 * Don't change this without also changing references in (mmx|sse|sse2)bfly29.s!
45 */
46 struct v29 {
47 unsigned char metrics1[256]; /* path metric buffer 1 */
48 unsigned char metrics2[256]; /* path metric buffer 2 */
49 decision_t *dp; /* Pointer to decision output for current bit */
50 unsigned char *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
51 decision_t *decisions; /* Beginning of decisions for block */
52 void *alloc_blk; /* Return value from malloc */
53 };
54
55
56 /* Create a new instance of a Viterbi decoder */
create_viterbi29(int len)57 void *create_viterbi29(int len){
58 void *blk;
59 struct v29 *vp;
60 int state;
61
62 if(!V29_init){
63 #if defined(SSE2)
64 if(!(cpu_features() & (1 << 26))){
65 fprintf(stderr,"viterbi29: CPU does not support SSE2 instructions\n");
66 exit(1);
67 }
68 #elif defined(SSE)
69 if(!(cpu_features() & (1 << 25))){
70 fprintf(stderr,"viterbi29: CPU does not support SSE instructions\n");
71 exit(1);
72 }
73 #elif defined(MMX)
74 if(!(cpu_features() & (1 << 23))){
75 fprintf(stderr,"viterbi29: CPU does not support MMX instructions\n");
76 exit(1);
77 }
78 #endif
79 /* Initialize metric tables */
80 for(state=0;state < 128;state++){
81 #if defined(MMX)
82 int symbol;
83 for(symbol = 0;symbol < 16;symbol++){
84 Mettab29_1[symbol][state] = parity((2*state) & V29POLYA) ? (15-symbol):symbol;
85 Mettab29_2[symbol][state] = parity((2*state) & V29POLYB) ? (15-symbol):symbol;
86 }
87 #else
88 Branchtab29_1[state] = parity((2*state) & V29POLYA) ? 15:0;
89 Branchtab29_2[state] = parity((2*state) & V29POLYB) ? 15:0;
90 #endif
91 }
92 V29_init = 1;
93 }
94 /* Malloc only guarantees 8-byte alignment, but we want to ensure that
95 * the path metric arrays are on 32-byte boundaries. At least 16-byte
96 * alignment is mandatory in the SSE2 version, but the Pentium III
97 * cache line size is 32 bytes
98 */
99 blk = malloc(sizeof(struct v29)+32);
100 if((int)blk & 31){
101 /* Not on 32-byte boundary; shift up */
102 vp = (struct v29 *)(((int)blk + 32) & ~31);
103 } else {
104 vp = (struct v29 *)blk;
105 }
106 vp->alloc_blk = blk; /* Record original pointer from malloc for use by free() */
107
108 /* The decisions only need be 32-bit aligned */
109 #if defined(MMX)
110 vp->dp = vp->decisions = malloc((len+8)*256);
111 #else
112 vp->dp = vp->decisions = malloc((len+8)*32);
113 #endif
114
115 vp->old_metrics = vp->metrics1;
116 vp->new_metrics = vp->metrics2;
117 return vp;
118 }
119
120 /* Initialize Viterbi decoder for start of new frame */
init_viterbi29(void * p,int starting_state)121 int init_viterbi29(void *p,int starting_state){
122 struct v29 *vp = p;
123
124 memset(vp->metrics1,60,256);
125 vp->old_metrics = vp->metrics1;
126 vp->new_metrics = vp->metrics2;
127 vp->dp = vp->decisions;
128 vp->old_metrics[starting_state & 255] = 0; /* Bias known start state */
129 return 0;
130 }
131
132 /* Do Viterbi chainback */
chainback_viterbi29(void * p,unsigned char * data,unsigned int nbits,unsigned int endstate)133 int chainback_viterbi29(
134 void *p,
135 unsigned char *data, /* Decoded output data */
136 unsigned int nbits, /* Number of data bits */
137 unsigned int endstate){ /* Terminal encoder state */
138
139 struct v29 *vp = p;
140 int k;
141 decision_t *decisions = vp->decisions;
142
143 /* Make room beyond the end of the encoder register so we can
144 * accumulate a full byte of decoded data
145 */
146 endstate %= 256;
147 decisions += 8; /* Look past tail */
148
149 while(nbits-- != 0){
150 k = EXTRACT_DECISION(&decisions[nbits],endstate);
151 /* The store into data[] only needs to be done every 8 bits.
152 * But this avoids a conditional branch, and the writes will
153 * combine in the cache anyway
154 */
155 data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
156 }
157 return 0;
158 }
159
160 /* Delete instance of a Viterbi decoder */
delete_viterbi29(void * p)161 void delete_viterbi29(void *p){
162 struct v29 *vp = p;
163
164 if(vp != NULL){
165 free(vp->decisions);
166 free(vp->alloc_blk);
167 }
168 }
169
170 #if !defined(MMX) && !defined(SSE) & !defined(SSE2)
171
172 /* C-language butterfly */
173 #define BFLY(i) {\
174 unsigned char metric,m0,m1,decision;\
175 metric = ((Branchtab29_1[i] ^ sym1) + (Branchtab29_2[i] ^ sym2) + 1)/2;\
176 m0 = vp->old_metrics[i] + metric;\
177 m1 = vp->old_metrics[i+128] + (15 - metric);\
178 decision = (m0-m1) >= 0;\
179 vp->new_metrics[2*i] = decision ? m1 : m0;\
180 vp->dp->w[i/16] |= decision << ((2*i)&31);\
181 m0 -= (metric+metric-15);\
182 m1 += (metric+metric-15);\
183 decision = (m0-m1) >= 0;\
184 vp->new_metrics[2*i+1] = decision ? m1 : m0;\
185 vp->dp->w[i/16] |= decision << ((2*i+1)&31);\
186 }
187
update_viterbi29(void * p,unsigned char sym1,unsigned char sym2)188 int update_viterbi29(void *p,unsigned char sym1,unsigned char sym2){
189 int i;
190 struct v29 *vp = p;
191 unsigned char *tmp;
192 int normalize = 0;
193
194 for(i=0;i<8;i++)
195 vp->dp->w[i] = 0;
196
197 for(i=0;i<128;i++)
198 BFLY(i);
199
200 /* Renormalize metrics */
201 if(vp->new_metrics[0] > 150){
202 int i;
203 unsigned char minmetric = 255;
204
205 for(i=0;i<64;i++)
206 if(vp->new_metrics[i] < minmetric)
207 minmetric = vp->new_metrics[i];
208 for(i=0;i<64;i++)
209 vp->new_metrics[i] -= minmetric;
210 normalize = minmetric;
211 }
212
213 vp->dp++;
214 tmp = vp->old_metrics;
215 vp->old_metrics = vp->new_metrics;
216 vp->new_metrics = tmp;
217
218 return normalize;
219 }
220 #endif
221
emms_viterbi29(void)222 void emms_viterbi29(void){
223 #if defined(MMX) || defined(SSE)
224 asm("emms");
225 #endif
226 }
227
228