1	#include "asmoff.h"
2.section .note.GNU-stack, "", @progbits
3
4.text
5	.align 4
6.globl dv_decode_vlc
7	.type	 dv_decode_vlc,@function
8dv_decode_vlc:
9	push %rbx
10	push %rbp
11
12	/* Args are at bits=rdi, maxbit=rsi, result=rdx */
13	mov  %rdi,%rax		/* %rax is bits */
14	mov  %rsi,%rbx		/* %rbx is maxbits */
15	and  $0x3f,%rbx		/* limit index range STL*/
16
17	/* note that BITS is left aligned */
18	/* klass = dv_vlc_classes[maxbits][(bits & (dv_vlc_class_index_mask[maxbits])) >> */
19	/*         (dv_vlc_class_index_rshift[maxbits])];  */
20/*	xor  %rbp,%rbp */
21	mov  dv_vlc_class_index_mask@GOTPCREL(%rip),%r11    /* use %rip for PIC code */
22 	mov  (%r11,%rbx,4),%ebp           /* int32 */      /* dv_vlc_class_index_mask[maxbits] */
23	and  %eax,%ebp                                     /* bits & */
24	mov  dv_vlc_class_index_rshift@GOTPCREL(%rip),%rcx
25	mov  (%rcx,%rbx,4),%ecx           /* int32 */      /* dv_vlc_class_index_rshift[maxbits] */
26	sar  %cl,%ebp                                      /* >> */
27	mov  dv_vlc_classes@GOTPCREL(%rip),%rcx
28	mov  (%rcx,%rbx,8),%rcx           /* ptr */        /* dv_vlc_classes[maxbits], a pointer */
29	movsbq  (%rcx,%rbp,1),%rbp        /* int8 */       /* klass = */
30
31	/* *result = dv_vlc_lookups[klass][(bits & (dv_vlc_index_mask[klass])) >> */
32	/*           (dv_vlc_index_rshift[klass])];   */
33/*	xor  %rbx,%rbx */
34	mov  dv_vlc_index_mask@GOTPCREL(%rip),%r11
35	mov  (%r11,%rbp,4),%ebx          /* int32 */       /* (dv_vlc_index_mask[klass]) */
36/*	xor  %rcx,%rcx */
37	mov  dv_vlc_index_rshift@GOTPCREL(%rip),%r11
38	mov  (%r11,%rbp,4),%ecx          /* int32 */       /* dv_vlc_index_rshift[klass] */
39	and  %eax,%ebx                                     /* bits &  */
40	sar  %cl,%ebx                                      /* >> */
41
42	mov  dv_vlc_lookups@GOTPCREL(%rip),%r11
43	mov  (%r11,%rbp,8),%rbp          /* ptr */         /* dv_vlc_lookups[klass] */
44	mov  (%rbp,%rbx,4),%ebp          /* int32 */       /* *result = */
45
46	/* Now %ebp holds result, a dv_vlc_t, like this:
47	   bits 0-7   run
48	   bits 8-15  len
49	   bits 16-31 amp
50	*/
51	/* code needs to do this with result:
52	   if ((result->lamp > 0) &&
53	     if (bits & sign_mask[result->len])
54	         result->lamp = -result->lamp;
55	   }
56	*/
57
58	/* Form a mask from (bits & sign_mask[result->len]) */
59	mov  %ebp,%ecx
60	sar  $8,%ecx
61	and  $0xff,%ecx                /* result->len */
62	mov  sign_mask@GOTPCREL(%rip),%rbx
63	mov  (%rbx,%rcx,4),%ebx        /* int32 */
64	and  %ebx,%eax
65	neg  %eax
66	sar  $31,%eax
67
68	mov  %ebp,%ebx
69	sar  $31,%ebx                  /* result->amp */
70	xor  $0xffffffff,%ebx
71	and  $0xffff0000,%ebx
72
73	and  %rbx,%rax
74
75	/* Now %eax is 0xffff0000 if we want to negate %ebp, zero otherwise */
76	xor  %eax,%ebp
77	sub  %eax,%ebp
78
79	/*
80	if (maxbits < result->len)
81	    *result = broken;
82	Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
83	*/
84	mov  %esi,%ebx		/* maxbits */ /* int32 */
85	sub  %ecx,%ebx
86	sbb  %ebx,%ebx
87	or   %ebx,%ebp
88
89	mov  %ebp,(%rdx)        /* *result = */
90
91	pop  %rbp
92	pop  %rbx
93
94	ret
95
96/*
97void __dv_decode_vlc(int bits, dv_vlc_t *result)
98*/
99
100.text
101	.align 4
102.globl __dv_decode_vlc
103	.type	 __dv_decode_vlc,@function
104__dv_decode_vlc:
105	push %rbx
106	push %rbp
107
108	/* Args are bits=rdi, result=rsi  */
109	mov  %rdi,%rax			/* %rax is bits */
110
111	mov  %rax,%rbp
112	and  $0xfe00,%ebp
113	sar  $9,%ebp
114	mov  dv_vlc_class_lookup5@GOTPCREL(%rip),%r11
115	movsbq  (%r11,%rbp),%rbp        /* int8 klass */
116
117	mov  dv_vlc_index_mask@GOTPCREL(%rip),%rbx
118	mov  (%rbx,%rbp,4),%ebx         /* int32 */
119	mov  dv_vlc_index_rshift@GOTPCREL(%rip),%rcx
120	mov  (%rcx,%rbp,4),%ecx         /* int32 */
121	and  %eax,%ebx
122	sar  %cl,%ebx			/* %rbx is klass */
123
124	mov  dv_vlc_lookups@GOTPCREL(%rip),%r11
125	mov  (%r11,%rbp,8),%rbp         /* ptr */
126	mov  (%rbp,%rbx,4),%ebp         /* int32 */
127
128	/* Now %ebp holds result, like this:
129	   bits 0-7   run
130	   bits 8-15  len
131	   bits 16-31 amp
132	*/
133	/* code needs to do this with result:
134	   if ((result->amp > 0) &&
135	     if ((bits >> sign_rshift[result->len]) & 1)
136	         result->amp = result->-amp;
137	   }
138	*/
139	/* if (result->amp < 0) %rbp is 0, else 0xffff0000. */
140	mov  %ebp,%ecx
141	sar  $8,%ecx
142	and  $0xff,%ecx
143	mov  sign_mask@GOTPCREL(%rip),%r11
144	mov  (%r11,%rcx,4),%ecx        /* int32 */
145	and  %ecx,%eax
146	neg  %eax
147	sar  $31,%eax
148
149	mov  %ebp,%ebx
150	sar  $31,%ebx
151	xor  $0xffffffff,%ebx
152	and  $0xffff0000,%ebx
153
154	and  %ebx,%eax
155
156	xor  %eax,%ebp
157	sub  %eax,%ebp
158
159	mov  %ebp,(%rsi)       /* *result = */
160
161	pop  %rbp
162	pop  %rbx
163
164	ret
165
166/*
167void dv_parse_ac_coeffs_pass0(bitstream_t *bs,
168			      dv_macroblock_t *mb,
169			      dv_block_t *bl)
170*/
171.text
172	.align	4
173.globl	dv_parse_ac_coeffs_pass0
174.type	dv_parse_ac_coeffs_pass0,@function
175
176dv_parse_ac_coeffs_pass0:
177
178	/* Args are at rdi=bs, rsi=mb, rdx=bl */
179	push	%r12
180	push	%r13
181	push	%r14
182	push	%r15
183
184	/*
185	eax	scratch
186	ecx     scratch
187	r11     scratch
188	r14	bs->buf
189	r13	bl->offset
190	r12	bl->reorder
191	r15	bl
192	*/
193	mov	%rdx,%r15                     /* bl */
194	mov	%rdi,%r14                     /* bs */
195	mov	bitstream_t_buf(%r14),%r14    /* bs->buf */
196/*	xor	%r13,%r13 */
197	mov	dv_block_t_offset(%r15),%r13d  /* bl->offset */
198/*	xor	%r12,%r12 */
199	mov	dv_block_t_reorder(%r15),%r12  /* bl->reorder */
200
201	/* I think it would be better to zero out the coeffs as we're
202	copying them into the framebuffer.  But that optimization is
203	for another day. */
204
205	movq    dv_block_t_coeffs(%r15),%mm1
206	pxor    %mm0,%mm0
207	pand    const_f_0_0_0(%rip),%mm1
208	movq    %mm1,dv_block_t_coeffs(%r15)  /* bl->coeffs[0] */
209
210	/* memset(&bl->coeffs[1],'\0',sizeof(bl->coeffs)-sizeof(bl->coeffs[0])); */
211	movq    %mm0,(dv_block_t_coeffs + 8)(%r15)
212	movq    %mm0,(dv_block_t_coeffs + 16)(%r15)
213	movq    %mm0,(dv_block_t_coeffs + 24)(%r15)
214	movq    %mm0,(dv_block_t_coeffs + 32)(%r15)
215	movq    %mm0,(dv_block_t_coeffs + 40)(%r15)
216	movq    %mm0,(dv_block_t_coeffs + 48)(%r15)
217	movq    %mm0,(dv_block_t_coeffs + 56)(%r15)
218	movq    %mm0,(dv_block_t_coeffs + 64)(%r15)
219	movq    %mm0,(dv_block_t_coeffs + 72)(%r15)
220	movq    %mm0,(dv_block_t_coeffs + 80)(%r15)
221	movq    %mm0,(dv_block_t_coeffs + 88)(%r15)
222	movq    %mm0,(dv_block_t_coeffs + 96)(%r15)
223	movq    %mm0,(dv_block_t_coeffs + 104)(%r15)
224	movq    %mm0,(dv_block_t_coeffs + 112)(%r15)
225	movq    %mm0,(dv_block_t_coeffs + 120)(%r15)
226
227readloop:
228	/* bits = bitstream_show(bs,16); */
229	mov	%r13,%rcx           /* bl->offset */
230	shr	$3,%rcx             /* divide by 8 bits/byte */
231	movzbq	(%r14,%rcx,1),%rax    /* bs->(buf+offset) */
232	movzbq	1(%r14,%rcx,1),%r11   /* bs->(buf+offset+1) */
233	movzbq	2(%r14,%rcx,1),%rcx   /* bs->(buf+offset+2) */
234	shl	$16,%rax
235	shl	$8,%r11
236	or	%rcx,%rax
237	or	%r11,%rax           /* rax contains the 3 bitstream bytes */
238	mov	%r13,%r11           /* bl->offset */
239	and	$7,%r11             /* num_bits = 3 lsb's of bl->offset */
240	mov	$8,%rcx
241	sub	%r11,%rcx           /* 8 - num_bits */
242	shr	%cl,%rax            /* bits = >> to remove bits already processed */
243
244	/* bits_left = bl->end - bl->offset; */
245	mov	dv_block_t_end(%r15),%r11d
246	sub	%r13d,%r11d	/* r11 is bits_left */
247
248	/* if(bits_left < 16) */
249	cmp	$16,%r11d
250	jl	slowpath
251
252	/* ecx is most significant 7 bits */
253	mov	%rax,%rcx
254	and	$0xfe00,%rcx
255	sar	$9,%rcx
256
257	/* Attempt to use the shortcut first.  If it hits, then
258	   this vlc term has been decoded. */
259	mov	dv_vlc_class1_shortcut@GOTPCREL(%rip),%r10
260	mov	(%r10,%rcx,4),%r11d    /* record32 dv_vlc_tab_t */
261	test	$0x80,%r11d
262
263	je	done_decode
264
265	/* */
266
267	/* fast path:	 use inlined version of __dv_decode_vlc */
268	/* ---------------------- */
269	mov	%r12,dv_block_t_reorder(%r15)
270
271	/* %rax is bits */
272
273	mov  dv_vlc_class_lookup5@GOTPCREL(%rip),%r10
274	movsbq  (%r10,%rcx,1),%rcx     /* int8 */
275
276/*	xor  %r12,%r12 */
277	mov  dv_vlc_index_mask@GOTPCREL(%rip),%r10
278	mov  (%r10,%rcx,4),%r12d       /* int32 */
279
280	mov  dv_vlc_lookups@GOTPCREL(%rip),%r10
281	mov  (%r10,%rcx,8),%r11       /* ptr->record32 */
282
283	mov  dv_vlc_index_rshift@GOTPCREL(%rip),%r10
284	mov  (%r10,%rcx,4),%ecx        /* int32 */
285
286	and  %eax,%r12d
287	sar  %cl,%r12d
288
289	mov  (%r11,%r12,4),%r11d       /* int32 */
290
291	/* Now %r11 holds result, like this:
292	   bits 0-7   run
293	   bits 8-15  len
294	   bits 16-31 amp
295	*/
296	test	$0x80,%r11d	/* If (vlc.run < 0) break */
297	jne	escape1
298	/* code needs to do this with result:
299	   if ((amp > 0) &&
300	     if ((bits >> sign_rshift[result->len]) & 1)
301	         amp = -amp;
302	   }
303	*/
304	/* if (amp < 0) %r11 is 0, else 0xffff0000. */
305/*	xor  %rcx,%rcx */
306	mov  %r11d,%ecx
307	sar  $8,%ecx
308	and  $0xff,%ecx
309	mov  sign_mask@GOTPCREL(%rip),%r10
310	mov  (%r10,%rcx,4),%ecx      /* int32 */
311	and  %ecx,%eax
312	neg  %eax
313	sar  $31,%eax
314
315	mov  %r11d,%r12d
316	sar  $31,%r12d
317	xor  $0xffffffff,%r12d
318	and  $0xffff0000,%r12d
319	and  %r12d,%eax
320
321	xor  %eax,%r11d
322	sub  %eax,%r11d
323
324	mov  dv_block_t_reorder(%r15),%r12    /* ptr */
325	/* ---------------------- */
326
327done_decode:
328	/* bl->offset += vlc.len */
329	mov	%r11d,%eax
330	shr	$8,%eax
331	and	$255,%eax
332	add	%eax,%r13d
333
334	/* bl->reorder += vlc.run */
335/*	xor	%rax,%rax */
336	mov	%r11d,%eax   /* int32 */
337	and	$255,%eax
338	add	%rax,%r12    /* ptr */
339
340	/* SET_COEFF(bl->coeffs, bl->reorder, vlc.amp); */
341	movzbq	(%r12),%rax
342	inc	%r12
343
344	shr	$16,%r11d
345	movw	%r11w,(dv_block_t_coeffs)(%r15,%rax,1)   /* int16 */
346
347	jmp	readloop
348
349escape1:
350	mov	dv_block_t_reorder(%r15),%r12
351escape:
352	/* if (vlc.amp == 0) */
353	test	$0xffff0000,%r11d
354	jne	ampnonzero
355	/* bl->reorder = bl->reorder_sentinel; */
356	mov	dv_block_t_reorder_sentinel(%r15),%r12  /* ptr */
357	/* bl->offset += 4; */
358	add	$4,%r13d
359	/* bl->eob = 1; */
360	movl	$1,dv_block_t_eob(%r15)           /* int32 */
361	/* mb->eob_count++; */
362	mov	%rsi,%r11
363	incl	dv_macroblock_t_eob_count(%r11)   /* int32 */
364
365	jmp	alldone
366	/* else if(vlc.len == VLC_ERROR) */
367ampnonzero:
368	and	$0x0000ff00,%r11d
369	cmp	$0x0000fe00,%r11d	/* VLC_ERROR */
370	jne	alldone
371	/* mb->vlc_error = TRUE; */
372	mov	%rsi,%r11
373	movl	$1,dv_macroblock_t_vlc_error(%r11); /* int32 */
374alldone:
375	mov	%r12,dv_block_t_reorder(%r15)   /* ptr */
376	mov	%r13d,dv_block_t_offset(%r15)   /* int32 */
377
378	pop	%r15
379	pop	%r14
380	pop	%r13
381	pop	%r12
382
383	ret
384
385slowpath:
386	/* slow path:	 use dv_decode_vlc */;
387	/* Args are at rdi=bits, rsi=bits_left, rdx=*vlc */
388	push	%rdi
389	push	%rsi
390	push	%rdx
391	mov	%r11,%rsi        /* bits */
392	mov	%rax,%rdi        /* bits_left */
393	lea	vlc(%rip),%rdx   /* *vlc */
394	mov	dv_decode_vlc@GOTPCREL(%rip),%r11
395	call	*%r11
396	pop	%rdx
397	pop	%rsi
398	pop	%rdi
399
400	mov	vlc(%rip),%r11
401	test	$0x80,%r11	/* If (vlc.run < 0) break */
402	jne	escape
403
404	jmp	done_decode
405
406show16:                         /* not used */
407	mov	%rbx,%rcx
408	mov	%rbx,%r11
409	shr	$3,%rcx
410	and	$7,%r11
411	mov	(%r14,%rcx,1),%rax
412	mov	1(%r14,%rcx,1),%rbx
413	mov	2(%r14,%rcx,1),%rcx
414	shl	$16,%rax
415	shl	$8,%rbx
416	or	%rcx,%rax
417	or	%rbx,%rax
418	mov	$8,%rcx
419	sub	%r11,%rcx
420	shr	%cl,%rax
421	ret
422
423
424/*
425gint dv_parse_video_segment(dv_videosegment_t *seg, guint quality) {
426*/
427	.globl dv_parse_video_segment
428	.type  dv_parse_video_segment,@function
429dv_parse_video_segment:
430
431	/* Args are at rdi=seg, rsi=quality */
432	push	%r12
433	push	%r13
434	push	%r14
435	push	%r15
436
437	mov	%rsi,%rax			/* quality */
438	mov	$4,%r12
439	test	$DV_QUALITY_COLOR,%rax
440	jz	its_mono
441	mov	$6,%r12
442its_mono:
443	mov	%r12d,n_blocks(%rip)            /* int32 */
444
445	/*
446	 *	r12	seg,m
447	 *
448	 *
449	 *      r14	bs->buf
450	 *	r13	mb
451	 *	r15	bl
452	 */
453	mov	%rdi,%r12                         /* seg */
454	mov	dv_videosegment_t_bs(%r12),%r14   /* seg->bs */
455	mov	bitstream_t_buf(%r14),%r14        /* seg->bs->t_buf */
456	lea	dv_videosegment_t_mb(%r12),%r13   /* seg->mb */
457
458	xor	%rax,%rax
459	xor	%rcx,%rcx
460macloop:
461	mov	%eax,m(%rip)                      /* int32 */
462	mov	%ecx,mb_start(%rip)               /* int32 */
463
464	mov	%rdi,%r12                         /* seg */
465
466	/* bitstream_seek_set(bs,mb_start+28); */
467	/* mb->qno = bitstream_get(bs,4); */
468	mov	%rcx,%r11
469	shr	$3,%r11
470	movzbq	3(%r14,%r11,1),%r11
471	and	$0xf,%r11
472	movl	%r11d,dv_macroblock_t_qno(%r13)    /* int32 */
473
474	/* mb->vlc_error = 0;
475           mb->eob_count = 0; */
476	xor	%r11,%r11
477	movl	%r11d,dv_macroblock_t_vlc_error(%r13) /* int32 */
478	movl	%r11d,dv_macroblock_t_eob_count(%r13) /* int32 */
479
480	/* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
481	mov	dv_super_map_vertical@GOTPCREL(%rip),%r11
482	movl	(%r11,%rax,4),%r11d                     /* int32 */
483/*	xor	%rcx,%rcx */
484	movl	dv_videosegment_t_i(%r12),%ecx          /* int32 */
485	add	%rcx,%r11
486
487skarly:
488/*	xor	%rcx,%rcx */
489	movl	dv_videosegment_t_isPAL(%r12),%ecx     /* int32 */
490	add	$-1,%rcx
491	sbb	%rcx,%rcx
492	and	$1,%rcx
493	shl	$5,%rcx		/* rcx = (isPAL ? 32 : 0) */
494
495	add	%r11,%rcx       /* rcx = offset from mod10 */
496	lea	mod_10(%rip),%r11
497	movzbq	(%r11,%rcx,1),%r11	/* uses mod_12 for PAL */ /* int8 */
498	movl	%r11d,dv_macroblock_t_i(%r13)         /* int32 */
499
500	/*  mb->j = dv_super_map_horizontal[m]; */
501	mov	dv_super_map_horizontal@GOTPCREL(%rip),%r11
502	movl	(%r11,%rax,4),%r11d                   /* int32 */
503	movl	%r11d,dv_macroblock_t_j(%r13)         /* int32 */
504
505	/* mb->k = seg->k; */
506	movl	dv_videosegment_t_k(%r12),%r11d       /* int32 */
507	movl	%r11d,dv_macroblock_t_k(%r13)         /* int32 */
508
509	xor	%r12,%r12                        /* b=0 */
510	lea	dv_macroblock_t_b(%r13),%r15     /* mb->b */
511
512blkloop:
513	/*
514		+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
515	        |15 |   |   |   |   |   |   |   | 7 | 6 | 5 | 4 |   |   |   | 0 |
516	        +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
517	        |                 dc                |mde| class |               |
518	        +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
519	*/
520	/* dc coefficient = bitstream_get(bs,9); */
521	mov	mb_start(%rip),%ecx     /* int32 */
522	shr	$3,%rcx
523	lea	blk_start(%rip),%r11
524	movzbq	(%r11,%r12),%r11        /* int8 */
525	add	%rcx,%r11
526	movzbq	(%r14,%r11,1),%rax	/* hi byte */
527	movzbq	1(%r14,%r11,1),%rcx	/* lo byte */
528	shl	$8,%rax
529	or	%rcx,%rax               /* int16 */
530
531        mov     %rax,%r11
532        /* if(dc > 255) dc -= 512;
533           just do an arithmetric shift right 7bits*/
534        sarw     $7,%r11w               /* dc in %r11, 9 bits */
535        movw    %r11w,dv_block_t_coeffs(%r15)     /* int16 */
536
537	/* bl->class_no = bitstream_get(bs,2); */
538	mov	%rax,%rcx
539	shr	$4,%rcx
540	and	$3,%rcx
541	movl	%ecx,dv_block_t_class_no(%r15)    /* int32 */
542
543	/* bl->eob=0 */
544 	xor	%rcx,%rcx
545	movl	%ecx,dv_block_t_eob(%r15)         /* int32 */
546
547	/* bl->dct_mode = bitstream_get(bs,1); */
548	shr	$6,%rax
549	and	$1,%rax
550	movl	%eax,dv_block_t_dct_mode(%r15)    /* int32 */
551
552	/* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
553	shl	$6,%rax                          /* *64 */
554	mov	dv_reorder@GOTPCREL(%rip),%rcx
555	add	$1,%rcx
556	add	%rcx,%rax
557	mov	%rax,dv_block_t_reorder(%r15)    /* ptr */
558
559	/* bl->reorder_sentinel = bl->reorder + 63; */
560	add	$63,%rax
561	mov	%rax,dv_block_t_reorder_sentinel(%r15) /* ptr */
562
563	/* bl->offset= mb_start + dv_parse_bit_start[b]; */
564/*	xor	%rcx,%rcx */
565	movl	mb_start(%rip),%ecx             /* int32 */
566	mov	dv_parse_bit_start@GOTPCREL(%rip),%rax
567	mov	(%rax,%r12,4),%eax              /* int32 */
568	add	%rcx,%rax
569	movl	%eax,dv_block_t_offset(%r15)    /* int32 */
570
571	/* bl->end= mb_start + dv_parse_bit_end[b]; */
572	mov	dv_parse_bit_end@GOTPCREL(%rip),%rax
573	mov	(%rax,%r12,4),%eax            /* int32 */
574	add	%ecx,%eax
575	mov	%eax,dv_block_t_end(%r15)     /* int32 */
576
577	/* dv_parse_ac_coeffs_pass0(bs,mb,bl); */
578	mov	%rsi,%rcx	/* quality */
579	test	$DV_QUALITY_AC_MASK,%rcx
580	jnz	do_ac_pass
581
582	/* no AC pass.  Just zero out the remaining coeffs */
583	movq    dv_block_t_coeffs(%r15),%mm1
584	pxor    %mm0,%mm0
585	pand    const_f_0_0_0(%rip),%mm1
586	movq    %mm1,dv_block_t_coeffs(%r15)
587	movq    %mm0,(dv_block_t_coeffs + 8)(%r15)
588	movq    %mm0,(dv_block_t_coeffs + 16)(%r15)
589	movq    %mm0,(dv_block_t_coeffs + 24)(%r15)
590	movq    %mm0,(dv_block_t_coeffs + 32)(%r15)
591	movq    %mm0,(dv_block_t_coeffs + 40)(%r15)
592	movq    %mm0,(dv_block_t_coeffs + 48)(%r15)
593	movq    %mm0,(dv_block_t_coeffs + 56)(%r15)
594	movq    %mm0,(dv_block_t_coeffs + 64)(%r15)
595	movq    %mm0,(dv_block_t_coeffs + 72)(%r15)
596	movq    %mm0,(dv_block_t_coeffs + 80)(%r15)
597	movq    %mm0,(dv_block_t_coeffs + 88)(%r15)
598	movq    %mm0,(dv_block_t_coeffs + 96)(%r15)
599	movq    %mm0,(dv_block_t_coeffs + 104)(%r15)
600	movq    %mm0,(dv_block_t_coeffs + 112)(%r15)
601	movq    %mm0,(dv_block_t_coeffs + 120)(%r15)
602	jmp	done_ac
603
604do_ac_pass:
605	/* dv_parse_ac_coeffs_pass0(bs,mb,bl);   Args are at rdi=bs, rsi=mb, rdx=bl */
606	push	%rdx
607	push	%rsi
608	push	%rdi
609	mov	dv_videosegment_t_bs(%rdi),%rdi   /* passed in rdi was seg, now passing seg->bs */
610	mov	%r13,%rsi                         /* mb */
611	mov	%r15,%rdx                         /* bl */
612	mov     dv_parse_ac_coeffs_pass0@GOTPCREL(%rip),%r11
613	call	*%r11
614	pop	%rdi
615	pop	%rsi
616	pop	%rdx
617
618done_ac:
619
620	movl	n_blocks(%rip),%eax          /* int32 */
621	add	$dv_block_t_size,%r15        /* point to next block */
622	inc	%r12                         /* b++ */
623	cmp	%eax,%r12d
624	jnz	blkloop
625
626	mov	m(%rip),%eax                 /* int32 */
627	mov	mb_start(%rip),%ecx          /* int32 */
628	add	$(8 * 80),%ecx
629	add	$dv_macroblock_t_size,%r13   /* point to next macroblock */
630	inc	%eax                         /* m++ */
631	cmp	$5,%eax
632	jnz	macloop
633
634	pop	%r15
635	pop	%r14
636	pop	%r13
637	pop	%r12
638
639	emms
640
641	/* if ((quality & DV_QUALITY_AC_MASK) == DV_QUALITY_AC_2) */
642	mov	%rsi,%rax	            /* quality */
643	and	$DV_QUALITY_AC_MASK,%rax
644	cmp	$DV_QUALITY_AC_2,%rax
645
646	jne	done
647	mov	dv_parse_ac_coeffs@GOTPCREL(%rip),%r11
648	jmp	*%r11
649
650done:	mov	$0,%rax
651
652	ret
653
654.data
655vlc:
656	.long	0
657m:
658	.long	0
659mb_start:
660	.long	0
661n_blocks:
662	.long	0	/* 4 for monochrome, 6 for color */
663blk_start:
664	.byte	4,18,32,46,60,70
665
666	/* mod tables, 32 bytes apart */
667mod_10:
668	.byte	0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7
669	.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0	/* spacer, see above */
670mod_12:
671	.byte	0,1,2,3,4,5,6,7,8,9,10,11,0,1,2,3,4,5,6,7,8
672
673	.align 16
674const_f_0_0_0:
675	.short	0xffff,0,0,0
676