1 /* K=7 r=1/2 Viterbi decoder with optional Intel SIMD
2 * Dec 2001, Phil Karn, KA9Q
3 */
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <memory.h>
7 #include "viterbi27.h"
8 #include "parity.h"
9
10 static int V27_init;
11 int cpu_features(void);
12
13 #if defined(SSE2)
14 char id_viterbi27[] = "k=7 r=1/2 Viterbi decoder, SSE2 version";
15 #elif defined(SSE)
16 char id_viterbi27[] = "k=7 r=1/2 Viterbi decoder, SSE version";
17 #elif defined(MMX)
18 char id_viterbi27[] = "k=7 r=1/2 Viterbi decoder, MMX version";
19 #else
20 char id_viterbi27[] = "k=7 r=1/2 Viterbi decoder, portable C version";
21 #endif
22
23 #if defined(MMX)
24 typedef union { long long p; char c[64]; } decision_t;
25 #define EXTRACT_DECISION(d,state) ((d)->c[(state)] & 1)
26
27 /*
28 Branch metric lookup tables, indexed by state and input symbol
29 Mettab27_1 is for the first symbol, Mettab27_2 for the second
30 The first index is the input symbol (0-15)
31 The second index is the state number
32 */
33 unsigned char Mettab27_1[16][32] __attribute__((aligned(32)));
34 unsigned char Mettab27_2[16][32] __attribute__((aligned(32)));
35
36 #else
37 typedef union { long long p; unsigned long w[2]; } decision_t;
38 #define EXTRACT_DECISION(d,state) (((d)->w[(state)/32] >> ((state)%32)) & 1)
39
40 unsigned char Branchtab27_1[32] __attribute__ ((aligned(32)));
41 unsigned char Branchtab27_2[32] __attribute__ ((aligned(32)));
42 #endif
43
44 /* State info for instance of Viterbi decoder
45 * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s!
46 */
47 struct v27 {
48 unsigned char metrics1[64]; /* path metric buffer 1 */
49 unsigned char metrics2[64]; /* path metric buffer 2 */
50 decision_t *dp; /* Pointer to decision output for current bit */
51 unsigned char *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
52 decision_t *decisions; /* Beginning of decisions for block */
53 void *alloc_blk; /* Return value from malloc */
54 };
55
56 /* Create a new instance of a Viterbi decoder */
create_viterbi27(int len)57 void *create_viterbi27(int len){
58 void *blk;
59 struct v27 *vp;
60 int state;
61
62 if(!V27_init){
63 /* Check that CPU has required features */
64 #if defined(SSE2)
65 if(!(cpu_features() & (1 << 26))){
66 fprintf(stderr,"viterbi27: CPU does not support SSE2 instructions\n");
67 exit(1);
68 }
69 #elif defined(SSE)
70 if(!(cpu_features() & (1 << 25))){
71 fprintf(stderr,"viterbi27: CPU does not support SSE instructions\n");
72 exit(1);
73 }
74 #elif defined(MMX)
75 if(!(cpu_features() & (1 << 23))){
76 fprintf(stderr,"viterbi27: CPU does not support MMX instructions\n");
77 exit(1);
78 }
79 #endif
80 /* Initialize metric tables */
81 for(state=0;state < 32;state++){
82 #ifdef MMX
83 int symbol;
84 for(symbol = 0;symbol < 16;symbol++){
85 Mettab27_1[symbol][state] = parity((2*state) & V27POLYA) ? (15-symbol):symbol;
86 Mettab27_2[symbol][state] = parity((2*state) & V27POLYB) ? (15-symbol):symbol;
87 }
88 #else
89 Branchtab27_1[state] = parity((2*state) & V27POLYA) ? 15:0;
90 Branchtab27_2[state] = parity((2*state) & V27POLYB) ? 15:0;
91 #endif
92 }
93 V27_init = 1;
94 }
95 /* Malloc only guarantees 8-byte alignment, but we want to ensure that
96 * the path metric arrays are on 32-byte boundaries. At least 16-byte
97 * alignment is mandatory in the SSE2 version, but the Pentium III
98 * cache line size is 32 bytes
99 */
100 blk = malloc(sizeof(struct v27)+32);
101 if((int)blk & 31){
102 /* Not on 32-byte boundary; shift up */
103 vp = (struct v27 *)(((int)blk + 32) & ~31);
104 } else {
105 vp = (struct v27 *)blk;
106 }
107 vp->alloc_blk = blk; /* Record original pointer from malloc for use by free() */
108
109 /* The decisions only need be 32-bit aligned */
110 #if defined(MMX)
111 vp->dp = vp->decisions = malloc((len+6)*64);
112 #else
113 vp->dp = vp->decisions = malloc((len+6)*8);
114 #endif
115
116 vp->old_metrics = vp->metrics1;
117 vp->new_metrics = vp->metrics2;
118 return vp;
119 }
120
121 /* Initialize Viterbi decoder for start of new frame */
init_viterbi27(void * p,int starting_state)122 int init_viterbi27(void *p,int starting_state){
123 struct v27 *vp = p;
124
125 memset(vp->metrics1,60,64);
126 vp->old_metrics = vp->metrics1;
127 vp->new_metrics = vp->metrics2;
128 vp->dp = vp->decisions;
129 vp->old_metrics[starting_state & 63] = 0; /* Bias known start state */
130 return 0;
131 }
132
133 /* Do Viterbi chainback */
chainback_viterbi27(void * p,unsigned char * data,unsigned int nbits,unsigned int endstate)134 int chainback_viterbi27(
135 void *p,
136 unsigned char *data, /* Decoded output data */
137 unsigned int nbits, /* Number of data bits */
138 unsigned int endstate){ /* Terminal encoder state */
139
140 struct v27 *vp = p;
141 int k;
142 decision_t *decisions = (decision_t *)(vp->decisions);
143
144 /* Make room beyond the end of the encoder register so we can
145 * accumulate a full byte of decoded data
146 */
147 endstate %= 64;
148 endstate <<= 2;
149 decisions += 6; /* Look past tail */
150
151 while(nbits-- != 0){
152 k = EXTRACT_DECISION(&decisions[nbits],endstate >> 2);
153 /* The store into data[] only needs to be done every 8 bits.
154 * But this avoids a conditional branch, and the writes will
155 * combine in the cache anyway
156 */
157 data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
158 }
159 return 0;
160 }
161
162 /* Delete instance of a Viterbi decoder */
delete_viterbi27(void * p)163 void delete_viterbi27(void *p){
164 struct v27 *vp = p;
165
166 if(vp != NULL){
167 free(vp->decisions);
168 free(vp->alloc_blk);
169 }
170 }
171 #if !defined(MMX) && !defined(SSE) & !defined(SSE2)
172 /* Portable C version */
173 /* C-language butterfly */
174 #define BFLY(i) {\
175 unsigned char metric,m0,m1,decision;\
176 metric = ((Branchtab27_1[i] ^ sym1) + (Branchtab27_2[i] ^ sym2) + 1)/2;\
177 m0 = vp->old_metrics[i] + metric;\
178 m1 = vp->old_metrics[i+32] + (15 - metric);\
179 decision = (m0-m1) >= 0;\
180 vp->new_metrics[2*i] = decision ? m1 : m0;\
181 vp->dp->w[i/16] |= decision << ((2*i)&31);\
182 m0 -= (metric+metric-15);\
183 m1 += (metric+metric-15);\
184 decision = (m0-m1) >= 0;\
185 vp->new_metrics[2*i+1] = decision ? m1 : m0;\
186 vp->dp->w[i/16] |= decision << ((2*i+1)&31);\
187 }
188
update_viterbi27(void * p,unsigned char sym1,unsigned char sym2)189 int update_viterbi27(void *p,unsigned char sym1,unsigned char sym2){
190 struct v27 *vp = p;
191 unsigned char *tmp;
192 int normalize = 0;
193
194 vp->dp->w[0] = vp->dp->w[1] = 0;
195
196 BFLY(0);
197 BFLY(1);
198 BFLY(2);
199 BFLY(3);
200 BFLY(4);
201 BFLY(5);
202 BFLY(6);
203 BFLY(7);
204 BFLY(8);
205 BFLY(9);
206 BFLY(10);
207 BFLY(11);
208 BFLY(12);
209 BFLY(13);
210 BFLY(14);
211 BFLY(15);
212 BFLY(16);
213 BFLY(17);
214 BFLY(18);
215 BFLY(19);
216 BFLY(20);
217 BFLY(21);
218 BFLY(22);
219 BFLY(23);
220 BFLY(24);
221 BFLY(25);
222 BFLY(26);
223 BFLY(27);
224 BFLY(28);
225 BFLY(29);
226 BFLY(30);
227 BFLY(31);
228
229 /* Renormalize metrics */
230 if(vp->new_metrics[0] > 150){
231 int i;
232 unsigned char minmetric = 255;
233
234 for(i=0;i<64;i++)
235 if(vp->new_metrics[i] < minmetric)
236 minmetric = vp->new_metrics[i];
237 for(i=0;i<64;i++)
238 vp->new_metrics[i] -= minmetric;
239 normalize = minmetric;
240 }
241
242 vp->dp++;
243 tmp = vp->old_metrics;
244 vp->old_metrics = vp->new_metrics;
245 vp->new_metrics = tmp;
246
247 return normalize;
248 }
249 #endif
250
emms_viterbi27(void)251 void emms_viterbi27(void){
252 #if defined(MMX) || defined(SSE)
253 asm("emms");
254 #endif
255 }
256
257 /* Dump current decoder state for debugging */
debug_viterbi27(void * p)258 void debug_viterbi27(void *p){
259 struct v27 *vp = p;
260 int i,j;
261
262 fprintf(stderr,"viterbi27 @ bit %d:\n",vp->dp - vp->decisions);
263 fprintf(stderr,"metrics:");
264 for(i=0;i<64;i++){
265 fprintf(stderr," %3d",vp->old_metrics[i]);
266 }
267 fprintf(stderr,"\n");
268 fprintf(stderr,"decisions: ");
269 for(i=0;i<64;i++){
270 j = EXTRACT_DECISION(vp->dp-1,i);
271 fprintf(stderr,"%d",j);
272 }
273 fprintf(stderr,"\n");
274 }
275