1// Copyright (c) 2018 Intel Corporation
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in all
11// copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19// SOFTWARE.
20
21
22.text
23
24
25.p2align 4, 0x90
26
27
28.globl mfxownpj_EncodeHuffman8x8_JPEG_16s1u_C1
29
30
31mfxownpj_EncodeHuffman8x8_JPEG_16s1u_C1:
32
33
34    push         %rbp
35
36
37    push         %rbx
38
39
40    push         %r12
41
42
43    push         %r13
44
45
46    push         %r14
47
48
49    push         %r15
50
51
52    sub          $(72), %rsp
53
54
55    mov          %rdi, (%rsp)
56    mov          %rdx, %rax
57    mov          %eax, %eax
58    mov          %rax, (8)(%rsp)
59    mov          %rcx, (16)(%rsp)
60    mov          %r8, (24)(%rsp)
61    mov          %r9, (40)(%rsp)
62    mov          %rsi, %r8
63
64
65    movq         (136)(%rsp), %rax
66    mov          $(64), %ebp
67    lea          (8)(%rax), %rdx
68    mov          $(24), %ecx
69    sub          (%rdx), %ebp
70    sub          (%rdx), %ecx
71    lea          (%rax), %rax
72    mov          (%rax), %r11
73    mov          (16)(%rsp), %rax
74    shr          %cl, %r11
75    mov          (%rax), %eax
76    lea          ownTables(%rip), %rsi
77    mov          %rax, %r12
78    sub          (8)(%rsp), %eax
79    cmp          $(-24), %eax
80    jg           LEncHuffExitEndOfWork01gas_1
81
82
83    mov          (24)(%rsp), %rax
84    mov          (%rsp), %rdx
85    movswl       (%rax), %ecx
86    movswl       (%rdx), %edx
87    mov          %ecx, (32)(%rsp)
88    mov          %dx, (%rax)
89    sub          %ecx, %edx
90    xor          %ecx, %ecx
91    mov          %edx, %ebx
92    neg          %edx
93    setg         %cl
94    cmovl        %ebx, %edx
95    sub          %ecx, %ebx
96    mov          $(32), %ecx
97    cmp          $(256), %edx
98    jge          LEncHuffDcLong00gas_1
99    movzbl       (64)(%rsi,%rdx), %edx
100LEncHuffDc00gas_1:
101    sub          %edx, %ecx
102    shl          %cl, %ebx
103    shr          %cl, %ebx
104    mov          (40)(%rsp), %rcx
105    mov          %ebx, %r14d
106    lea          (%rcx), %rbx
107    mov          (%rbx,%rdx,4), %ecx
108    mov          %ecx, %r15d
109    shr          $(16), %ecx
110    jz           LEncHuffExitEndOfWork00gas_1
111    and          mask0F(%rip), %r15
112    sub          %ecx, %ebp
113    sub          %edx, %ebp
114    shl          %cl, %r11
115    mov          %edx, %ecx
116    or           %r15, %r11
117    shl          %cl, %r11
118    or           %r14, %r11
119
120
121    movq         (128)(%rsp), %r9
122    lea          (%r9), %r9
123    xor          %edx, %edx
124    mov          (%rsp), %rbx
125    mov          $(63), %edi
126
127
128    or           (124)(%rbx), %edx
129    jnz          LEncHuffAcZeros00gas_1
130    sub          $(2), %edi
131    or           (108)(%rbx), %edx
132    jnz          LEncHuffAcZeros00gas_1
133    sub          $(1), %edi
134    or           (92)(%rbx), %edx
135    jnz          LEncHuffAcZeros00gas_1
136    sub          $(2), %edi
137    or           (120)(%rbx), %edx
138    jnz          LEncHuffAcZeros00gas_1
139    sub          $(2), %edi
140    or           (104)(%rbx), %edx
141    jnz          LEncHuffAcZeros00gas_1
142    sub          $(2), %edi
143    or           (76)(%rbx), %edx
144    jnz          LEncHuffAcZeros00gas_1
145    sub          $(1), %edi
146    or           (60)(%rbx), %edx
147    jnz          LEncHuffAcZeros00gas_1
148    sub          $(2), %edi
149    or           (88)(%rbx), %edx
150    jnz          LEncHuffAcZeros00gas_1
151    sub          $(2), %edi
152    or           (116)(%rbx), %edx
153    jnz          LEncHuffAcZeros00gas_1
154    sub          $(2), %edi
155    or           (100)(%rbx), %edx
156    jnz          LEncHuffAcZeros00gas_1
157    sub          $(2), %edi
158    or           (72)(%rbx), %edx
159    jnz          LEncHuffAcZeros00gas_1
160    sub          $(2), %edi
161    or           (44)(%rbx), %edx
162    jnz          LEncHuffAcZeros00gas_1
163    sub          $(1), %edi
164    or           (28)(%rbx), %edx
165    jnz          LEncHuffAcZeros00gas_1
166    sub          $(2), %edi
167    or           (56)(%rbx), %edx
168    jnz          LEncHuffAcZeros00gas_1
169    sub          $(2), %edi
170    or           (84)(%rbx), %edx
171    jnz          LEncHuffAcZeros00gas_1
172    sub          $(2), %edi
173    or           (112)(%rbx), %edx
174    jnz          LEncHuffAcZeros00gas_1
175    sub          $(2), %edi
176    or           (96)(%rbx), %edx
177    jnz          LEncHuffAcZeros00gas_1
178    sub          $(2), %edi
179    or           (68)(%rbx), %edx
180    jnz          LEncHuffAcZeros00gas_1
181    sub          $(2), %edi
182    or           (40)(%rbx), %edx
183    jnz          LEncHuffAcZeros00gas_1
184    sub          $(2), %edi
185    or           (12)(%rbx), %edx
186    jnz          LEncHuffAcZeros00gas_1
187    sub          $(2), %edi
188    or           (24)(%rbx), %edx
189    jnz          LEncHuffAcZeros00gas_1
190    sub          $(2), %edi
191    or           (52)(%rbx), %edx
192    jnz          LEncHuffAcZeros00gas_1
193    sub          $(2), %edi
194    or           (80)(%rbx), %edx
195    jnz          LEncHuffAcZeros00gas_1
196    sub          $(3), %edi
197    or           (64)(%rbx), %edx
198    jnz          LEncHuffAcZeros00gas_1
199    sub          $(2), %edi
200    or           (36)(%rbx), %edx
201    jnz          LEncHuffAcZeros00gas_1
202    sub          $(2), %edi
203    or           (8)(%rbx), %edx
204    jnz          LEncHuffAcZeros00gas_1
205    sub          $(2), %edi
206    or           (20)(%rbx), %edx
207    jnz          LEncHuffAcZeros00gas_1
208    sub          $(2), %edi
209    or           (48)(%rbx), %edx
210    jnz          LEncHuffAcZeros00gas_1
211    sub          $(3), %edi
212    or           (32)(%rbx), %edx
213    jnz          LEncHuffAcZeros00gas_1
214    sub          $(2), %edi
215    or           (4)(%rbx), %edx
216    jnz          LEncHuffAcZeros00gas_1
217    sub          $(2), %edi
218    or           (16)(%rbx), %edx
219    jnz          LEncHuffAcZeros00gas_1
220    sub          $(2), %edi
221
222LEncHuffAcZeros00gas_1:
223    xor          %edx, %edx
224    movzbl       (%rdi,%rsi), %ecx
225LEncHuffAcZeros01gas_1:
226    orw          (%rbx,%rcx,2), %dx
227    jnz          LEncHuffAcZeros02gas_1
228    movzbl       (-1)(%rdi,%rsi), %ecx
229    sub          $(1), %edi
230    jg           LEncHuffAcZeros01gas_1
231LEncHuffAcZeros02gas_1:
232    add          $(1), %edi
233    mov          $(63), %eax
234    cmp          $(63), %edi
235    cmovg        %eax, %edi
236    mov          %edi, (56)(%rsp)
237    mov          $(1), %edi
238    xor          %r13d, %r13d
239    movzbl       (%rdi,%rsi), %ecx
240
241
242LEncHuffAc00gas_1:
243    movswl       (%rbx,%rcx,2), %edx
244    movzbl       (1)(%rdi,%rsi), %ecx
245    test         %edx, %edx
246    jnz          LEncHuffAc01gas_1
247    mov          (56)(%rsp), %edx
248    add          $(16), %r13d
249    add          $(1), %edi
250    cmp          %edx, %edi
251    jle          LEncHuffAc00gas_1
252    jmp          LEncHuffAc10gas_1
253
254
255LEncHuffAc01gas_1:
256    cmp          $(256), %r13d
257    jge          LEncHuffAc20gas_1
258LEncHuffAc02gas_1:
259    xor          %ecx, %ecx
260    mov          %edx, %ebx
261    neg          %edx
262    setg         %cl
263    cmovl        %ebx, %edx
264    sub          %ecx, %ebx
265    mov          $(32), %ecx
266    cmp          $(256), %edx
267    jge          LEncHuffAcLong00gas_1
268    movzbl       (64)(%rsi,%rdx), %edx
269LEncHuffAc03gas_1:
270    sub          %edx, %ecx
271    shl          %cl, %ebx
272    or           %edx, %r13d
273    shr          %cl, %ebx
274    mov          %ebx, %r14d
275    mov          (%r9,%r13,4), %ebx
276    mov          %ebx, %r15d
277    shr          $(16), %ebx
278    jz           LEncHuffExitEndOfWork00gas_1
279    and          mask0F(%rip), %r15
280    cmp          %ebx, %ebp
281    jl           LEncHuffCallWrite00gas_1
282LEncHuffRetWrite00gas_1:
283    mov          %ebx, %ecx
284    sub          %ebx, %ebp
285    mov          (%rsp), %rbx
286    shl          %cl, %r11
287    or           %r15, %r11
288    cmp          %edx, %ebp
289    jl           LEncHuffCallWrite01gas_1
290LEncHuffRetWrite01gas_1:
291    mov          %edx, %ecx
292    add          $(1), %edi
293    shl          %cl, %r11
294    or           %r14, %r11
295    sub          %edx, %ebp
296    mov          (56)(%rsp), %edx
297    xor          %r13d, %r13d
298    movzbl       (%rdi,%rsi), %ecx
299    cmp          %edx, %edi
300    jle          LEncHuffAc00gas_1
301    jmp          LEncHuffExitNormgas_1
302
303
304LEncHuffAc10gas_1:
305    mov          (%r9), %ebx
306    mov          %ebx, %r15d
307    shr          $(16), %ebx
308    jz           LEncHuffExitEndOfWork00gas_1
309    and          mask0F(%rip), %r15
310    cmp          %ebx, %ebp
311    jl           LEncHuffCallWrite03gas_1
312LEncHuffRetWrite03gas_1:
313    mov          %ebx, %ecx
314    sub          %ebx, %ebp
315    shl          %cl, %r11
316    or           %r15, %r11
317    jmp          LEncHuffExitNormgas_1
318
319
320LEncHuffAc20gas_1:
321    mov          (960)(%r9), %ebx
322    mov          %ebx, %r15d
323    shr          $(16), %ebx
324    jz           LEncHuffExitEndOfWork00gas_1
325    and          mask0F(%rip), %r15
326    cmp          %ebx, %ebp
327    jl           LEncHuffCallWrite02gas_1
328LEncHuffRetWrite02gas_1:
329    mov          %ebx, %ecx
330    sub          %ebx, %ebp
331    shl          %cl, %r11
332    or           %r15, %r11
333    sub          $(256), %r13d
334    cmp          $(256), %r13d
335    jge          LEncHuffAc20gas_1
336    jmp          LEncHuffAc02gas_1
337
338
339LEncHuffCallWrite04gas_1:
340    lea          LEncHuffRetWrite04gas_1(%rip), %rax
341    mov          %rax, (48)(%rsp)
342    jmp          LEncHuffWrite00gas_1
343
344LEncHuffCallWrite03gas_1:
345    lea          LEncHuffRetWrite03gas_1(%rip), %rax
346    mov          %rax, (48)(%rsp)
347    jmp          LEncHuffWrite00gas_1
348
349LEncHuffCallWrite02gas_1:
350    lea          LEncHuffRetWrite02gas_1(%rip), %rax
351    mov          %rax, (48)(%rsp)
352    jmp          LEncHuffWrite00gas_1
353
354LEncHuffCallWrite01gas_1:
355    lea          LEncHuffRetWrite01gas_1(%rip), %rax
356    mov          %rax, (48)(%rsp)
357    jmp          LEncHuffWrite00gas_1
358
359LEncHuffCallWrite00gas_1:
360    lea          LEncHuffRetWrite00gas_1(%rip), %rax
361    mov          %rax, (48)(%rsp)
362
363LEncHuffWrite00gas_1:
364    mov          %ebp, %ecx
365    sub          $(64), %ebp
366    neg          %ebp
367    mov          %r11, %r10
368    shl          %cl, %r10
369    movd         %r10, %xmm1
370    bswap        %r10
371    mov          %r12d, %eax
372    mov          %r12d, %ecx
373    subl         (8)(%rsp), %eax
374    cmp          $(-8), %eax
375    jg           LEncHuffExitEndOfWork00gas_1
376    cmp          $(32), %ebp
377    jl           LEncHuffWrite11gas_1
378
379    pcmpeqb      maskFF(%rip), %xmm1
380    movd         %xmm1, %rax
381    test         %rax, %rax
382    jnz          LEncHuffWrite10gas_1
383    mov          %r10, (%r8,%rcx)
384    mov          %ebp, %eax
385    and          $(7), %ebp
386    and          $(4294967288), %eax
387    sub          $(64), %ebp
388    shr          $(3), %eax
389    add          %eax, %r12d
390    mov          (48)(%rsp), %rax
391    neg          %ebp
392    jmp          *%rax
393
394
395LEncHuffWrite10gas_1:
396    sub          (8)(%rsp), %ecx
397    cmp          $(-16), %ecx
398    mov          %r12d, %ecx
399    jg           LEncHuffExitEndOfWork00gas_1
400LEncHuffWrite11gas_1:
401    sub          $(8), %ebp
402    jl           LEncHuffWrite12gas_1
403    mov          %r10b, %al
404    shr          $(8), %r10
405    mov          %al, (%r8,%rcx)
406    add          $(1), %ecx
407    cmp          $(255), %al
408    jne          LEncHuffWrite11gas_1
409    xor          %eax, %eax
410    mov          %al, (%r8,%rcx)
411    add          $(1), %ecx
412    jmp          LEncHuffWrite11gas_1
413LEncHuffWrite12gas_1:
414    sub          $(56), %ebp
415    mov          %rcx, %r12
416    neg          %ebp
417    mov          (48)(%rsp), %rax
418    jmp          *%rax
419
420
421LEncHuffExitEndOfWork00gas_1:
422    mov          (24)(%rsp), %rax
423    mov          (32)(%rsp), %ecx
424    mov          %cx, (%rax)
425LEncHuffExitEndOfWork01gas_1:
426    mov          $(1), %eax
427    jmp          LEncHuffExit00gas_1
428
429LEncHuffExitNormgas_1:
430    cmp          $(56), %ebp
431    jle          LEncHuffCallWrite04gas_1
432LEncHuffRetWrite04gas_1:
433    mov          (16)(%rsp), %rax
434    mov          %r12d, (%rax)
435    mov          %ebp, %ecx
436    sub          $(64), %ebp
437    shl          %cl, %r11
438    neg          %ebp
439    shr          $(40), %r11
440    movq         (136)(%rsp), %rax
441    lea          (8)(%rax), %rcx
442    mov          %ebp, (%rcx)
443    lea          (%rax), %rax
444    mov          %r11d, (%rax)
445    mov          $(0), %eax
446LEncHuffExit00gas_1:
447    add          $(72), %rsp
448
449
450    pop          %r15
451
452
453    pop          %r14
454
455
456    pop          %r13
457
458
459    pop          %r12
460
461
462    pop          %rbx
463
464
465    pop          %rbp
466
467    ret
468
469
470LEncHuffDcLong00gas_1:
471    shr          $(8), %edx
472    movzbl       (64)(%rsi,%rdx), %edx
473    add          $(8), %edx
474    jmp          LEncHuffDc00gas_1
475
476LEncHuffAcLong00gas_1:
477    shr          $(8), %edx
478    movzbl       (64)(%rsi,%rdx), %edx
479    add          $(8), %edx
480    jmp          LEncHuffAc03gas_1
481
482
483.data
484
485.p2align 4, 0x90
486
487maskFF:
488.quad   0xffffffffffffffff,  0xffffffffffffffff
489
490mask0F:
491.quad               0xffff
492
493
494ownTables:
495.byte   0,  1,  8, 16,  9,  2,  3, 10
496
497
498.byte  17, 24, 32, 25, 18, 11,  4,  5
499
500
501.byte  12, 19, 26, 33, 40, 48, 41, 34
502
503
504.byte  27, 20, 13,  6,  7, 14, 21, 28
505
506
507.byte  35, 42, 49, 56, 57, 50, 43, 36
508
509
510.byte  29, 22, 15, 23, 30, 37, 44, 51
511
512
513.byte  58, 59, 52, 45, 38, 31, 39, 46
514
515
516.byte  53, 60, 61, 54, 47, 55, 62, 63
517
518
519
520.byte  0, 1, 2, 2, 3, 3, 3, 3
521
522
523.fill 8, 1, 4
524
525
526.fill 16, 1, 5
527
528
529.fill 32, 1, 6
530
531
532.fill 64, 1, 7
533
534
535.fill 128, 1, 8
536
537
538