1/* Intel SIMD MMX implementation of Viterbi ACS butterflies
2   for 64-state (k=7) convolutional code
3   Copyright 2004 Phil Karn, KA9Q
4   This code may be used under the terms of the GNU Lesser General Public License (LGPL)
5
6   int update_viterbi27_blk_mmx(struct v27 *vp,unsigned char *syms,int nbits) ;
7*/
8	# MMX (64-bit SIMD) version
9	# requires Pentium-MMX, Pentium-II or better
10
11	# These are offsets into struct v27, defined in viterbi27_mmx.c
12	.set DP,128
13	.set OLDMETRICS,132
14	.set NEWMETRICS,136
15	.text
16	.global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2
17	.type update_viterbi27_blk_mmx,@function
18	.align 16
19
20update_viterbi27_blk_mmx:
21	pushl %ebp
22	movl %esp,%ebp
23	pushl %esi
24	pushl %edi
25	pushl %edx
26	pushl %ebx
27
28	movl 8(%ebp),%edx	# edx = vp
29	testl %edx,%edx
30	jnz  0f
31	movl -1,%eax
32	jmp  err
330:	movl OLDMETRICS(%edx),%esi	# esi -> old metrics
34	movl NEWMETRICS(%edx),%edi	# edi -> new metrics
35	movl DP(%edx),%edx	# edx -> decisions
36
371:	movl 16(%ebp),%eax	# eax = nbits
38	decl %eax
39	jl   2f			# passed zero, we're done
40	movl %eax,16(%ebp)
41
42	movl 12(%ebp),%ebx	# ebx = syms
43	movw (%ebx),%ax		# ax = second symbol : first symbol
44	addl $2,%ebx
45	movl %ebx,12(%ebp)
46
47	movb %ah,%bl
48	andl $255,%eax
49	andl $255,%ebx
50
51	# shift into first array index dimension slot
52	shll $5,%eax
53	shll $5,%ebx
54
55	# each invocation of this macro will do 8 butterflies in parallel
56	.MACRO butterfly GROUP
57	# Compute branch metrics
58	movq (Mettab27_1+8*\GROUP)(%eax),%mm3
59	movq fifteens,%mm0
60
61	paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3
62	paddb ones,%mm3  # emulate pavgb - this may not be necessary
63	psrlq $1,%mm3
64	pand %mm0,%mm3
65
66	movq (8*\GROUP)(%esi),%mm6	# Incoming path metric, high bit = 0
67	movq ((8*\GROUP)+32)(%esi),%mm2 # Incoming path metric, high bit = 1
68	movq %mm6,%mm1
69	movq %mm2,%mm7
70
71	paddb %mm3,%mm6
72	paddb %mm3,%mm2
73	pxor  %mm0,%mm3		 # invert branch metric
74	paddb %mm3,%mm7		 # path metric for inverted symbols
75	paddb %mm3,%mm1
76
77	# live registers 1 2 6 7
78	# Compare mm6 and mm7;  mm1 and mm2
79	pxor %mm3,%mm3
80	movq %mm6,%mm4
81	movq %mm1,%mm5
82	psubb %mm7,%mm4		# mm4 = mm6 - mm7
83	psubb %mm2,%mm5		# mm5 = mm1 - mm2
84	pcmpgtb %mm3,%mm4	# mm4 = first set of decisions (ff = 1 better)
85	pcmpgtb %mm3,%mm5	# mm5 = second set of decisions
86
87	# live registers 1 2 4 5 6 7
88	# select survivors
89	movq %mm4,%mm0
90	pand %mm4,%mm7
91	movq %mm5,%mm3
92	pand %mm5,%mm2
93	pandn %mm6,%mm0
94	pandn %mm1,%mm3
95	por %mm0,%mm7		# mm7 = first set of survivors
96	por %mm3,%mm2		# mm2 = second set of survivors
97
98	# live registers 2 4 5 7
99	# interleave & store decisions in mm4, mm5
100	# interleave & store new branch metrics in mm2, mm7
101	movq %mm4,%mm3
102	movq %mm7,%mm0
103	punpckhbw %mm5,%mm4
104	punpcklbw %mm5,%mm3
105	punpcklbw %mm2,%mm7	# interleave second 8 new metrics
106	punpckhbw %mm2,%mm0	# interleave first 8 new metrics
107	movq %mm4,(16*\GROUP+8)(%edx)
108	movq %mm3,(16*\GROUP)(%edx)
109	movq %mm7,(16*\GROUP)(%edi)
110	movq %mm0,(16*\GROUP+8)(%edi)
111
112	.endm
113
114# invoke macro 4 times for a total of 32 butterflies
115	butterfly GROUP=0
116	butterfly GROUP=1
117	butterfly GROUP=2
118	butterfly GROUP=3
119
120	addl $64,%edx		# bump decision pointer
121
122	# swap metrics
123	movl %esi,%eax
124	movl %edi,%esi
125	movl %eax,%edi
126	jmp 1b
127
1282:	emms
129	movl 8(%ebp),%ebx	# ebx = vp
130	# stash metric pointers
131	movl %esi,OLDMETRICS(%ebx)
132	movl %edi,NEWMETRICS(%ebx)
133	movl %edx,DP(%ebx)	# stash incremented value of vp->dp
134	xorl %eax,%eax
135err:	popl %ebx
136	popl %edx
137	popl %edi
138	popl %esi
139	popl %ebp
140	ret
141
142	.data
143	.align 8
144fifteens:
145	.byte 15,15,15,15,15,15,15,15
146
147	.align 8
148ones:	.byte 1,1,1,1,1,1,1,1
149