1/*
2 * Copyright (C) 2010 Mans Rullgard
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/arm/asm.S"
22
23.macro rac_get_prob     h, bs, buf, cw, pr, t0, t1
24        adds            \bs, \bs, \t0
25        lsl             \cw, \cw, \t0
26        lsl             \t0, \h,  \t0
27        rsb             \h,  \pr, #256
28        it              cs
29        ldrhcs          \t1, [\buf], #2
30        smlabb          \h,  \t0, \pr, \h
31T       itttt           cs
32        rev16cs         \t1, \t1
33A       orrcs           \cw, \cw, \t1, lsl \bs
34T       lslcs           \t1, \t1, \bs
35T       orrcs           \cw, \cw, \t1
36        subcs           \bs, \bs, #16
37        lsr             \h,  \h,  #8
38        cmp             \cw, \h,  lsl #16
39        itt             ge
40        subge           \cw, \cw, \h,  lsl #16
41        subge           \h,  \t0, \h
42.endm
43
44.macro rac_get_128      h, bs, buf, cw, t0, t1
45        adds            \bs, \bs, \t0
46        lsl             \cw, \cw, \t0
47        lsl             \t0, \h,  \t0
48        it              cs
49        ldrhcs          \t1, [\buf], #2
50        mov             \h,  #128
51        it              cs
52        rev16cs         \t1, \t1
53        add             \h,  \h,  \t0, lsl #7
54A       orrcs           \cw, \cw, \t1, lsl \bs
55T       ittt            cs
56T       lslcs           \t1, \t1, \bs
57T       orrcs           \cw, \cw, \t1
58        subcs           \bs, \bs, #16
59        lsr             \h,  \h,  #8
60        cmp             \cw, \h,  lsl #16
61        itt             ge
62        subge           \cw, \cw, \h,  lsl #16
63        subge           \h,  \t0, \h
64.endm
65
66function ff_decode_block_coeffs_armv6, export=1
67        push            {r0,r1,r4-r11,lr}
68        movrelx         lr,  X(ff_vp56_norm_shift)
69        ldrd            r4,  r5,  [sp, #44]             @ token_prob, qmul
70        cmp             r3,  #0
71        ldr             r11, [r5]
72        ldm             r0,  {r5-r7}                    @ high, bits, buf
73        it              ne
74        pkhtbne         r11, r11, r11, asr #16
75        ldr             r8,  [r0, #16]                  @ code_word
760:
77        ldrb            r9,  [lr, r5]
78        add             r3,  r3,  #1
79        ldrb            r0,  [r4, #1]
80        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
81        blt             2f
82
83        ldrb            r9,  [lr, r5]
84        ldrb            r0,  [r4, #2]
85        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
86        ldrb            r9,  [lr, r5]
87        bge             3f
88
89        add             r4,  r3,  r3,  lsl #5
90        sxth            r12, r11
91        add             r4,  r4,  r2
92        adds            r6,  r6,  r9
93        add             r4,  r4,  #11
94        lsl             r8,  r8,  r9
95        it              cs
96        ldrhcs          r10, [r7], #2
97        lsl             r9,  r5,  r9
98        mov             r5,  #128
99        it              cs
100        rev16cs         r10, r10
101        add             r5,  r5,  r9,  lsl #7
102T       ittt            cs
103T       lslcs           r10, r10, r6
104T       orrcs           r8,  r8,  r10
105A       orrcs           r8,  r8,  r10, lsl r6
106        subcs           r6,  r6,  #16
107        lsr             r5,  r5,  #8
108        cmp             r8,  r5,  lsl #16
109        movrel          r10, zigzag_scan-1
110        itt             ge
111        subge           r8,  r8,  r5,  lsl #16
112        subge           r5,  r9,  r5
113        ldrb            r10, [r10, r3]
114        it              ge
115        rsbge           r12, r12, #0
116        cmp             r3,  #16
117        strh            r12, [r1, r10]
118        bge             6f
1195:
120        ldrb            r9,  [lr, r5]
121        ldrb            r0,  [r4]
122        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
123        pkhtb           r11, r11, r11, asr #16
124        bge             0b
125
1266:
127        ldr             r0,  [sp]
128        ldr             r9,  [r0, #12]
129        cmp             r7,  r9
130        it              hi
131        movhi           r7,  r9
132        stm             r0,  {r5-r7}                    @ high, bits, buf
133        str             r8,  [r0, #16]                  @ code_word
134
135        add             sp,  sp,  #8
136        mov             r0,  r3
137        pop             {r4-r11,pc}
1382:
139        add             r4,  r3,  r3,  lsl #5
140        cmp             r3,  #16
141        add             r4,  r4,  r2
142        pkhtb           r11, r11, r11, asr #16
143        bne             0b
144        b               6b
1453:
146        ldrb            r0,  [r4, #3]
147        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
148        ldrb            r9,  [lr, r5]
149        bge             1f
150
151        mov             r12, #2
152        ldrb            r0,  [r4, #4]
153        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
154        it              ge
155        addge           r12, #1
156        ldrb            r9,  [lr, r5]
157        blt             4f
158        ldrb            r0,  [r4, #5]
159        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
160        it              ge
161        addge           r12, #1
162        ldrb            r9,  [lr, r5]
163        b               4f
1641:
165        ldrb            r0,  [r4, #6]
166        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
167        ldrb            r9,  [lr, r5]
168        bge             3f
169
170        ldrb            r0,  [r4, #7]
171        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
172        ldrb            r9,  [lr, r5]
173        bge             2f
174
175        mov             r12, #5
176        mov             r0,  #159
177        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
178        it              ge
179        addge           r12, r12, #1
180        ldrb            r9,  [lr, r5]
181        b               4f
1822:
183        mov             r12, #7
184        mov             r0,  #165
185        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
186        it              ge
187        addge           r12, r12, #2
188        ldrb            r9,  [lr, r5]
189        mov             r0,  #145
190        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
191        it              ge
192        addge           r12, r12, #1
193        ldrb            r9,  [lr, r5]
194        b               4f
1953:
196        ldrb            r0,  [r4, #8]
197        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
198        it              ge
199        addge           r4,  r4,  #1
200        ldrb            r9,  [lr, r5]
201        ite             ge
202        movge           r12, #2
203        movlt           r12, #0
204        ldrb            r0,  [r4, #9]
205        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
206        mov             r9,  #8
207        it              ge
208        addge           r12, r12, #1
209        movrelx         r4,  X(ff_vp8_dct_cat_prob), r1
210        lsl             r9,  r9,  r12
211        ldr             r4,  [r4, r12, lsl #2]
212        add             r12, r9,  #3
213        mov             r1,  #0
214        ldrb            r0,  [r4], #1
2151:
216        ldrb            r9,  [lr, r5]
217        lsl             r1,  r1,  #1
218        rac_get_prob    r5,  r6,  r7,  r8,  r0,  r9,  r10
219        ldrb            r0,  [r4], #1
220        it              ge
221        addge           r1,  r1,  #1
222        cmp             r0,  #0
223        bne             1b
224        ldrb            r9,  [lr, r5]
225        add             r12, r12, r1
226        ldr             r1,  [sp, #4]
2274:
228        add             r4,  r3,  r3,  lsl #5
229        add             r4,  r4,  r2
230        add             r4,  r4,  #22
231        rac_get_128     r5,  r6,  r7,  r8,  r9,  r10
232        it              ge
233        rsbge           r12, r12, #0
234        smulbb          r12, r12, r11
235        movrel          r9,  zigzag_scan-1
236        ldrb            r9,  [r9, r3]
237        cmp             r3,  #16
238        strh            r12, [r1, r9]
239        bge             6b
240        b               5b
241endfunc
242
243const zigzag_scan
244        .byte            0,  2,  8, 16
245        .byte           10,  4,  6, 12
246        .byte           18, 24, 26, 20
247        .byte           14, 22, 28, 30
248endconst
249