1 /* K=7 r=1/2 Viterbi decoder with optional Intel SIMD
2  * Dec 2001, Phil Karn, KA9Q
3  */
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <memory.h>
7 #include "viterbi27.h"
8 #include "parity.h"
9 
10 static int V27_init;
11 int cpu_features(void);
12 
13 #if defined(SSE2)
14 char id_viterbi27[] = "k=7 r=1/2 Viterbi decoder, SSE2 version";
15 #elif defined(SSE)
16 char id_viterbi27[] = "k=7 r=1/2 Viterbi decoder, SSE version";
17 #elif defined(MMX)
18 char id_viterbi27[] = "k=7 r=1/2 Viterbi decoder, MMX version";
19 #else
20 char id_viterbi27[] = "k=7 r=1/2 Viterbi decoder, portable C version";
21 #endif
22 
23 #if defined(MMX)
24 typedef union { long long p; char c[64]; } decision_t;
25 #define EXTRACT_DECISION(d,state) ((d)->c[(state)] & 1)
26 
27 /*
28   Branch metric lookup tables, indexed by state and input symbol
29   Mettab27_1 is for the first symbol, Mettab27_2 for the second
30   The first index is the input symbol (0-15)
31   The second index is the state number
32 */
33 unsigned char Mettab27_1[16][32] __attribute__((aligned(32)));
34 unsigned char Mettab27_2[16][32] __attribute__((aligned(32)));
35 
36 #else
37 typedef union { long long p; unsigned long w[2]; } decision_t;
38 #define EXTRACT_DECISION(d,state) (((d)->w[(state)/32] >> ((state)%32)) & 1)
39 
40 unsigned char Branchtab27_1[32] __attribute__ ((aligned(32)));
41 unsigned char Branchtab27_2[32] __attribute__ ((aligned(32)));
42 #endif
43 
44 /* State info for instance of Viterbi decoder
45  * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s!
46  */
47 struct v27 {
48   unsigned char metrics1[64]; /* path metric buffer 1 */
49   unsigned char metrics2[64]; /* path metric buffer 2 */
50   decision_t *dp;              /* Pointer to decision output for current bit */
51   unsigned char *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
52   decision_t *decisions;       /* Beginning of decisions for block */
53   void *alloc_blk;             /* Return value from malloc */
54 };
55 
56 /* Create a new instance of a Viterbi decoder */
create_viterbi27(int len)57 void *create_viterbi27(int len){
58   void *blk;
59   struct v27 *vp;
60   int state;
61 
62   if(!V27_init){
63     /* Check that CPU has required features */
64 #if defined(SSE2)
65     if(!(cpu_features() & (1 << 26))){
66       fprintf(stderr,"viterbi27: CPU does not support SSE2 instructions\n");
67       exit(1);
68     }
69 #elif defined(SSE)
70     if(!(cpu_features() & (1 << 25))){
71       fprintf(stderr,"viterbi27: CPU does not support SSE instructions\n");
72       exit(1);
73     }
74 #elif defined(MMX)
75     if(!(cpu_features() & (1 << 23))){
76       fprintf(stderr,"viterbi27: CPU does not support MMX instructions\n");
77       exit(1);
78     }
79 #endif
80     /* Initialize metric tables */
81     for(state=0;state < 32;state++){
82 #ifdef MMX
83       int symbol;
84       for(symbol = 0;symbol < 16;symbol++){
85 	Mettab27_1[symbol][state] = parity((2*state) & V27POLYA) ? (15-symbol):symbol;
86 	Mettab27_2[symbol][state] = parity((2*state) & V27POLYB) ? (15-symbol):symbol;
87       }
88 #else
89       Branchtab27_1[state] = parity((2*state) & V27POLYA) ? 15:0;
90       Branchtab27_2[state] = parity((2*state) & V27POLYB) ? 15:0;
91 #endif
92     }
93     V27_init = 1;
94   }
95   /* Malloc only guarantees 8-byte alignment, but we want to ensure that
96    * the path metric arrays are on 32-byte boundaries. At least 16-byte
97    * alignment is mandatory in the SSE2 version, but the Pentium III
98    * cache line size is 32 bytes
99    */
100   blk = malloc(sizeof(struct v27)+32);
101   if((int)blk & 31){
102     /* Not on 32-byte boundary; shift up */
103     vp = (struct v27 *)(((int)blk + 32) & ~31);
104   } else {
105     vp = (struct v27 *)blk;
106   }
107   vp->alloc_blk = blk; /* Record original pointer from malloc for use by free() */
108 
109   /* The decisions only need be 32-bit aligned */
110 #if defined(MMX)
111   vp->dp = vp->decisions = malloc((len+6)*64);
112 #else
113   vp->dp = vp->decisions = malloc((len+6)*8);
114 #endif
115 
116   vp->old_metrics = vp->metrics1;
117   vp->new_metrics = vp->metrics2;
118   return vp;
119 }
120 
121 /* Initialize Viterbi decoder for start of new frame */
init_viterbi27(void * p,int starting_state)122 int init_viterbi27(void *p,int starting_state){
123   struct v27 *vp = p;
124 
125   memset(vp->metrics1,60,64);
126   vp->old_metrics = vp->metrics1;
127   vp->new_metrics = vp->metrics2;
128   vp->dp = vp->decisions;
129   vp->old_metrics[starting_state & 63] = 0; /* Bias known start state */
130   return 0;
131 }
132 
133 /* Do Viterbi chainback */
chainback_viterbi27(void * p,unsigned char * data,unsigned int nbits,unsigned int endstate)134 int chainback_viterbi27(
135       void *p,
136       unsigned char *data, /* Decoded output data */
137       unsigned int nbits, /* Number of data bits */
138       unsigned int endstate){ /* Terminal encoder state */
139 
140   struct v27 *vp = p;
141   int k;
142   decision_t *decisions = (decision_t *)(vp->decisions);
143 
144   /* Make room beyond the end of the encoder register so we can
145    * accumulate a full byte of decoded data
146    */
147   endstate %= 64;
148   endstate <<= 2;
149   decisions += 6; /* Look past tail */
150 
151   while(nbits-- != 0){
152     k = EXTRACT_DECISION(&decisions[nbits],endstate >> 2);
153     /* The store into data[] only needs to be done every 8 bits.
154      * But this avoids a conditional branch, and the writes will
155      * combine in the cache anyway
156      */
157     data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
158   }
159   return 0;
160 }
161 
162 /* Delete instance of a Viterbi decoder */
delete_viterbi27(void * p)163 void delete_viterbi27(void *p){
164   struct v27 *vp = p;
165 
166   if(vp != NULL){
167     free(vp->decisions);
168     free(vp->alloc_blk);
169   }
170 }
171 #if !defined(MMX) && !defined(SSE) & !defined(SSE2)
172 /* Portable C version */
173 /* C-language butterfly */
174 #define BFLY(i) {\
175 unsigned char metric,m0,m1,decision;\
176     metric = ((Branchtab27_1[i] ^ sym1) + (Branchtab27_2[i] ^ sym2) + 1)/2;\
177     m0 = vp->old_metrics[i] + metric;\
178     m1 = vp->old_metrics[i+32] + (15 - metric);\
179     decision = (m0-m1) >= 0;\
180     vp->new_metrics[2*i] = decision ? m1 : m0;\
181     vp->dp->w[i/16] |= decision << ((2*i)&31);\
182     m0 -= (metric+metric-15);\
183     m1 += (metric+metric-15);\
184     decision = (m0-m1) >= 0;\
185     vp->new_metrics[2*i+1] = decision ? m1 : m0;\
186     vp->dp->w[i/16] |= decision << ((2*i+1)&31);\
187 }
188 
update_viterbi27(void * p,unsigned char sym1,unsigned char sym2)189 int update_viterbi27(void *p,unsigned char sym1,unsigned char sym2){
190   struct v27 *vp = p;
191   unsigned char *tmp;
192   int normalize = 0;
193 
194   vp->dp->w[0] = vp->dp->w[1] = 0;
195 
196   BFLY(0);
197   BFLY(1);
198   BFLY(2);
199   BFLY(3);
200   BFLY(4);
201   BFLY(5);
202   BFLY(6);
203   BFLY(7);
204   BFLY(8);
205   BFLY(9);
206   BFLY(10);
207   BFLY(11);
208   BFLY(12);
209   BFLY(13);
210   BFLY(14);
211   BFLY(15);
212   BFLY(16);
213   BFLY(17);
214   BFLY(18);
215   BFLY(19);
216   BFLY(20);
217   BFLY(21);
218   BFLY(22);
219   BFLY(23);
220   BFLY(24);
221   BFLY(25);
222   BFLY(26);
223   BFLY(27);
224   BFLY(28);
225   BFLY(29);
226   BFLY(30);
227   BFLY(31);
228 
229   /* Renormalize metrics */
230   if(vp->new_metrics[0] > 150){
231     int i;
232     unsigned char minmetric = 255;
233 
234     for(i=0;i<64;i++)
235       if(vp->new_metrics[i] < minmetric)
236 	minmetric = vp->new_metrics[i];
237     for(i=0;i<64;i++)
238       vp->new_metrics[i] -= minmetric;
239     normalize = minmetric;
240   }
241 
242   vp->dp++;
243   tmp = vp->old_metrics;
244   vp->old_metrics = vp->new_metrics;
245   vp->new_metrics = tmp;
246 
247   return normalize;
248 }
249 #endif
250 
emms_viterbi27(void)251 void emms_viterbi27(void){
252 #if defined(MMX) || defined(SSE)
253   asm("emms");
254 #endif
255 }
256 
257 /* Dump current decoder state for debugging */
debug_viterbi27(void * p)258 void debug_viterbi27(void *p){
259   struct v27 *vp = p;
260   int i,j;
261 
262   fprintf(stderr,"viterbi27 @ bit %d:\n",vp->dp - vp->decisions);
263   fprintf(stderr,"metrics:");
264   for(i=0;i<64;i++){
265     fprintf(stderr," %3d",vp->old_metrics[i]);
266   }
267   fprintf(stderr,"\n");
268   fprintf(stderr,"decisions: ");
269   for(i=0;i<64;i++){
270     j = EXTRACT_DECISION(vp->dp-1,i);
271     fprintf(stderr,"%d",j);
272   }
273   fprintf(stderr,"\n");
274 }
275