1/*
2 * Copyright (c) 2014 RISC OS Open Ltd
3 * Author: Ben Avison <bavison@riscosopen.org>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "libavutil/arm/asm.S"
23
24.macro loadregoffsh2  group, index, base, offgroup, offindex
25       .altmacro
26       loadregoffsh2_ \group, %(\index), \base, \offgroup, %(\offindex)
27       .noaltmacro
28.endm
29
30.macro loadregoffsh2_ group, index, base, offgroup, offindex
31        ldr     \group\index, [\base, \offgroup\offindex, lsl #2]
32.endm
33
34.macro eorlslreg  check, data, group, index
35        .altmacro
36        eorlslreg_ \check, \data, \group, %(\index)
37        .noaltmacro
38.endm
39
40.macro eorlslreg_ check, data, group, index
41        eor     \check, \check, \data, lsl \group\index
42.endm
43
44.macro decr_modulo var, by, modulus
45 .set \var, \var - \by
46 .if \var == 0
47  .set \var, \modulus
48 .endif
49.endm
50
51 .macro load_group1  size, channels, r0, r1, r2, r3, pointer_dead=0
52  .if \size == 2
53        ldrd    \r0, \r1, [IN], #(\size + 8 - \channels) * 4
54  .else // size == 4
55   .if IDX1 > 4 || \channels==8
56        ldm     IN!, {\r0, \r1, \r2, \r3}
57   .else
58        ldm     IN, {\r0, \r1, \r2, \r3}
59    .if !\pointer_dead
60        add     IN, IN, #(4 + 8 - \channels) * 4
61     .endif
62   .endif
63  .endif
64        decr_modulo IDX1, \size, \channels
65 .endm
66
67 .macro load_group2  size, channels, r0, r1, r2, r3, pointer_dead=0
68  .if \size == 2
69   .if IDX1 > 2
70        ldm     IN!, {\r2, \r3}
71   .else
72//A   .ifc \r2, ip
73//A    .if \pointer_dead
74//A       ldm     IN, {\r2, \r3}
75//A    .else
76//A       ldr     \r2, [IN], #4
77//A       ldr     \r3, [IN], #(\size - 1 + 8 - \channels) * 4
78//A    .endif
79//A   .else
80        ldrd    \r2, \r3, [IN], #(\size + 8 - \channels) * 4
81//A   .endif
82   .endif
83  .endif
84        decr_modulo IDX1, \size, \channels
85 .endm
86
87.macro implement_pack  inorder, channels, shift
88.if \inorder
89.ifc \shift, mixed
90
91CHECK   .req    a1
92COUNT   .req    a2
93IN      .req    a3
94OUT     .req    a4
95DAT0    .req    v1
96DAT1    .req    v2
97DAT2    .req    v3
98DAT3    .req    v4
99SHIFT0  .req    v5
100SHIFT1  .req    v6
101SHIFT2  .req    sl
102SHIFT3  .req    fp
103SHIFT4  .req    ip
104SHIFT5  .req    lr
105
106 .macro output4words
107  .set SIZE_GROUP1, IDX1
108  .if SIZE_GROUP1 > 4
109   .set SIZE_GROUP1, 4
110  .endif
111  .set SIZE_GROUP2, 4 - SIZE_GROUP1
112        load_group1  SIZE_GROUP1, \channels, DAT0, DAT1, DAT2, DAT3
113        load_group2  SIZE_GROUP2, \channels, DAT0, DAT1, DAT2, DAT3
114   .if \channels == 2
115        lsl     DAT0, SHIFT0
116        lsl     DAT1, SHIFT1
117        lsl     DAT2, SHIFT0
118        lsl     DAT3, SHIFT1
119   .elseif \channels == 6
120    .if IDX2 == 6
121        lsl     DAT0, SHIFT0
122        lsl     DAT1, SHIFT1
123        lsl     DAT2, SHIFT2
124        lsl     DAT3, SHIFT3
125    .elseif IDX2 == 2
126        lsl     DAT0, SHIFT4
127        lsl     DAT1, SHIFT5
128        lsl     DAT2, SHIFT0
129        lsl     DAT3, SHIFT1
130    .else // IDX2 == 4
131        lsl     DAT0, SHIFT2
132        lsl     DAT1, SHIFT3
133        lsl     DAT2, SHIFT4
134        lsl     DAT3, SHIFT5
135    .endif
136   .elseif \channels == 8
137    .if IDX2 == 8
138        uxtb    SHIFT0, SHIFT4, ror #0
139        uxtb    SHIFT1, SHIFT4, ror #8
140        uxtb    SHIFT2, SHIFT4, ror #16
141        uxtb    SHIFT3, SHIFT4, ror #24
142    .else
143        uxtb    SHIFT0, SHIFT5, ror #0
144        uxtb    SHIFT1, SHIFT5, ror #8
145        uxtb    SHIFT2, SHIFT5, ror #16
146        uxtb    SHIFT3, SHIFT5, ror #24
147    .endif
148        lsl     DAT0, SHIFT0
149        lsl     DAT1, SHIFT1
150        lsl     DAT2, SHIFT2
151        lsl     DAT3, SHIFT3
152   .endif
153        eor     CHECK, CHECK, DAT0, lsr #8 - (\channels - IDX2)
154        eor     CHECK, CHECK, DAT1, lsr #7 - (\channels - IDX2)
155   decr_modulo IDX2, 2, \channels
156        eor     CHECK, CHECK, DAT2, lsr #8 - (\channels - IDX2)
157        eor     CHECK, CHECK, DAT3, lsr #7 - (\channels - IDX2)
158   decr_modulo IDX2, 2, \channels
159        stm     OUT!, {DAT0 - DAT3}
160 .endm
161
162 .set WORDS_PER_LOOP, \channels  // calculate LCM (channels, 4)
163 .if (WORDS_PER_LOOP % 2) == 0
164  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
165 .endif
166 .if (WORDS_PER_LOOP % 2) == 0
167  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
168 .endif
169 .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4
170 .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
171
172function ff_mlp_pack_output_inorder_\channels\()ch_mixedshift_armv6, export=1
173 .if SAMPLES_PER_LOOP > 1
174        tst     COUNT, #SAMPLES_PER_LOOP - 1  // always seems to be in practice
175        it      ne
176        bne     X(ff_mlp_pack_output)         // but just in case, branch to C implementation if not
177 .endif
178        teq     COUNT, #0
179        it      eq
180        bxeq    lr
181        push    {v1-v6,sl,fp,lr}
182        ldr     SHIFT0, [sp, #(9+1)*4]  // get output_shift from stack
183        ldr     SHIFT1, =0x08080808
184        ldr     SHIFT4, [SHIFT0]
185 .if \channels == 2
186        uadd8   SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8
187        uxtb    SHIFT0, SHIFT4, ror #0
188        uxtb    SHIFT1, SHIFT4, ror #8
189 .else
190        ldr     SHIFT5, [SHIFT0, #4]
191        uadd8   SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8
192        uadd8   SHIFT5, SHIFT5, SHIFT1
193  .if \channels == 6
194        uxtb    SHIFT0, SHIFT4, ror #0
195        uxtb    SHIFT1, SHIFT4, ror #8
196        uxtb    SHIFT2, SHIFT4, ror #16
197        uxtb    SHIFT3, SHIFT4, ror #24
198        uxtb    SHIFT4, SHIFT5, ror #0
199        uxtb    SHIFT5, SHIFT5, ror #8
200  .endif
201 .endif
202 .set IDX1, \channels
203 .set IDX2, \channels
2040:
205 .rept WORDS_PER_LOOP / 4
206        output4words
207 .endr
208        subs    COUNT, COUNT, #SAMPLES_PER_LOOP
209        bne     0b
210        pop     {v1-v6,sl,fp,pc}
211        .ltorg
212endfunc
213 .purgem output4words
214
215        .unreq  CHECK
216        .unreq  COUNT
217        .unreq  IN
218        .unreq  OUT
219        .unreq  DAT0
220        .unreq  DAT1
221        .unreq  DAT2
222        .unreq  DAT3
223        .unreq  SHIFT0
224        .unreq  SHIFT1
225        .unreq  SHIFT2
226        .unreq  SHIFT3
227        .unreq  SHIFT4
228        .unreq  SHIFT5
229
230.else // not mixed
231
232CHECK   .req    a1
233COUNT   .req    a2
234IN      .req    a3
235OUT     .req    a4
236DAT0    .req    v1
237DAT1    .req    v2
238DAT2    .req    v3
239DAT3    .req    v4
240DAT4    .req    v5
241DAT5    .req    v6
242DAT6    .req    sl // use these rather than the otherwise unused
243DAT7    .req    fp // ip and lr so that we can load them using LDRD
244
245 .macro output4words  tail, head, r0, r1, r2, r3, r4, r5, r6, r7, pointer_dead=0
246  .if \head
247   .set SIZE_GROUP1, IDX1
248   .if SIZE_GROUP1 > 4
249    .set SIZE_GROUP1, 4
250   .endif
251   .set SIZE_GROUP2, 4 - SIZE_GROUP1
252        load_group1  SIZE_GROUP1, \channels, \r0, \r1, \r2, \r3, \pointer_dead
253  .endif
254  .if \tail
255        eor     CHECK, CHECK, \r4, lsr #8 - (\channels - IDX2)
256        eor     CHECK, CHECK, \r5, lsr #7 - (\channels - IDX2)
257   decr_modulo IDX2, 2, \channels
258  .endif
259  .if \head
260        load_group2  SIZE_GROUP2, \channels, \r0, \r1, \r2, \r3, \pointer_dead
261  .endif
262  .if \tail
263        eor     CHECK, CHECK, \r6, lsr #8 - (\channels - IDX2)
264        eor     CHECK, CHECK, \r7, lsr #7 - (\channels - IDX2)
265   decr_modulo IDX2, 2, \channels
266        stm     OUT!, {\r4, \r5, \r6, \r7}
267  .endif
268  .if \head
269        lsl     \r0, #8 + \shift
270        lsl     \r1, #8 + \shift
271        lsl     \r2, #8 + \shift
272        lsl     \r3, #8 + \shift
273  .endif
274 .endm
275
276 .set WORDS_PER_LOOP, \channels  // calculate LCM (channels, 8)
277 .if (WORDS_PER_LOOP % 2) == 0
278  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
279 .endif
280 .if (WORDS_PER_LOOP % 2) == 0
281  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
282 .endif
283 .if (WORDS_PER_LOOP % 2) == 0
284  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
285 .endif
286 .set WORDS_PER_LOOP, WORDS_PER_LOOP * 8
287 .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
288
289function ff_mlp_pack_output_inorder_\channels\()ch_\shift\()shift_armv6, export=1
290 .if SAMPLES_PER_LOOP > 1
291        tst     COUNT, #SAMPLES_PER_LOOP - 1  // always seems to be in practice
292        it      ne
293        bne     X(ff_mlp_pack_output)         // but just in case, branch to C implementation if not
294 .endif
295        subs    COUNT, COUNT, #SAMPLES_PER_LOOP
296        it      lo
297        bxlo    lr
298        push    {v1-v6,sl,fp,lr}
299 .set IDX1, \channels
300 .set IDX2, \channels
301        output4words  0, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
3020:      beq     1f
303 .rept WORDS_PER_LOOP / 8
304        output4words  1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3
305        output4words  1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
306 .endr
307        subs    COUNT, COUNT, #SAMPLES_PER_LOOP
308        bne     0b
3091:
310 .rept WORDS_PER_LOOP / 8 - 1
311        output4words  1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3
312        output4words  1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
313 .endr
314        output4words  1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3, pointer_dead=1
315        output4words  1, 0, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
316        pop     {v1-v6,sl,fp,pc}
317endfunc
318 .purgem output4words
319
320        .unreq  CHECK
321        .unreq  COUNT
322        .unreq  IN
323        .unreq  OUT
324        .unreq  DAT0
325        .unreq  DAT1
326        .unreq  DAT2
327        .unreq  DAT3
328        .unreq  DAT4
329        .unreq  DAT5
330        .unreq  DAT6
331        .unreq  DAT7
332
333.endif // mixed
334.else // not inorder
335.ifc \shift, mixed
336
337// This case not currently handled
338
339.else // not mixed
340
341#if !CONFIG_THUMB
342
343CHECK   .req    a1
344COUNT   .req    a2
345IN      .req    a3
346OUT     .req    a4
347DAT0    .req    v1
348DAT1    .req    v2
349DAT2    .req    v3
350DAT3    .req    v4
351CHAN0   .req    v5
352CHAN1   .req    v6
353CHAN2   .req    sl
354CHAN3   .req    fp
355CHAN4   .req    ip
356CHAN5   .req    lr
357
358 .macro output4words
359  .if \channels == 8
360   .if IDX1 == 8
361        uxtb    CHAN0, CHAN4, ror #0
362        uxtb    CHAN1, CHAN4, ror #8
363        uxtb    CHAN2, CHAN4, ror #16
364        uxtb    CHAN3, CHAN4, ror #24
365   .else
366        uxtb    CHAN0, CHAN5, ror #0
367        uxtb    CHAN1, CHAN5, ror #8
368        uxtb    CHAN2, CHAN5, ror #16
369        uxtb    CHAN3, CHAN5, ror #24
370   .endif
371        ldr     DAT0, [IN, CHAN0, lsl #2]
372        ldr     DAT1, [IN, CHAN1, lsl #2]
373        ldr     DAT2, [IN, CHAN2, lsl #2]
374        ldr     DAT3, [IN, CHAN3, lsl #2]
375   .if IDX1 == 4
376        add     IN, IN, #8*4
377   .endif
378        decr_modulo IDX1, 4, \channels
379  .else
380   .set SIZE_GROUP1, IDX1
381   .if SIZE_GROUP1 > 4
382    .set SIZE_GROUP1, 4
383   .endif
384   .set SIZE_GROUP2, 4 - SIZE_GROUP1
385   .if SIZE_GROUP1 == 2
386        loadregoffsh2  DAT, 0, IN, CHAN, 0 + (\channels - IDX1)
387        loadregoffsh2  DAT, 1, IN, CHAN, 1 + (\channels - IDX1)
388        add     IN, IN, #8*4
389   .else // SIZE_GROUP1 == 4
390        loadregoffsh2  DAT, 0, IN, CHAN, 0 + (\channels - IDX1)
391        loadregoffsh2  DAT, 1, IN, CHAN, 1 + (\channels - IDX1)
392        loadregoffsh2  DAT, 2, IN, CHAN, 2 + (\channels - IDX1)
393        loadregoffsh2  DAT, 3, IN, CHAN, 3 + (\channels - IDX1)
394    .if IDX1 == 4
395        add     IN, IN, #8*4
396    .endif
397   .endif
398        decr_modulo IDX1, SIZE_GROUP1, \channels
399   .if SIZE_GROUP2 == 2
400        loadregoffsh2  DAT, 2, IN, CHAN, 0 + (\channels - IDX1)
401        loadregoffsh2  DAT, 3, IN, CHAN, 1 + (\channels - IDX1)
402    .if IDX1 == 2
403        add     IN, IN, #8*4
404    .endif
405   .endif
406        decr_modulo IDX1, SIZE_GROUP2, \channels
407  .endif
408  .if \channels == 8 // in this case we can corrupt CHAN0-3
409        rsb     CHAN0, CHAN0, #8
410        rsb     CHAN1, CHAN1, #8
411        rsb     CHAN2, CHAN2, #8
412        rsb     CHAN3, CHAN3, #8
413        lsl     DAT0, #8 + \shift
414        lsl     DAT1, #8 + \shift
415        lsl     DAT2, #8 + \shift
416        lsl     DAT3, #8 + \shift
417        eor     CHECK, CHECK, DAT0, lsr CHAN0
418        eor     CHECK, CHECK, DAT1, lsr CHAN1
419        eor     CHECK, CHECK, DAT2, lsr CHAN2
420        eor     CHECK, CHECK, DAT3, lsr CHAN3
421  .else
422   .if \shift != 0
423        lsl     DAT0, #\shift
424        lsl     DAT1, #\shift
425        lsl     DAT2, #\shift
426        lsl     DAT3, #\shift
427   .endif
428        bic     DAT0, DAT0, #0xff000000
429        bic     DAT1, DAT1, #0xff000000
430        bic     DAT2, DAT2, #0xff000000
431        bic     DAT3, DAT3, #0xff000000
432        eorlslreg CHECK, DAT0, CHAN, 0 + (\channels - IDX2)
433        eorlslreg CHECK, DAT1, CHAN, 1 + (\channels - IDX2)
434   decr_modulo IDX2, 2, \channels
435        eorlslreg CHECK, DAT2, CHAN, 0 + (\channels - IDX2)
436        eorlslreg CHECK, DAT3, CHAN, 1 + (\channels - IDX2)
437   decr_modulo IDX2, 2, \channels
438        lsl     DAT0, #8
439        lsl     DAT1, #8
440        lsl     DAT2, #8
441        lsl     DAT3, #8
442  .endif
443        stm     OUT!, {DAT0 - DAT3}
444 .endm
445
446 .set WORDS_PER_LOOP, \channels  // calculate LCM (channels, 4)
447 .if (WORDS_PER_LOOP % 2) == 0
448  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
449 .endif
450 .if (WORDS_PER_LOOP % 2) == 0
451  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
452 .endif
453 .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4
454 .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
455
456function ff_mlp_pack_output_outoforder_\channels\()ch_\shift\()shift_armv6, export=1
457 .if SAMPLES_PER_LOOP > 1
458        tst     COUNT, #SAMPLES_PER_LOOP - 1  // always seems to be in practice
459        it      ne
460        bne     X(ff_mlp_pack_output)         // but just in case, branch to C implementation if not
461 .endif
462        teq     COUNT, #0
463        it      eq
464        bxeq    lr
465        push    {v1-v6,sl,fp,lr}
466        ldr     CHAN0, [sp, #(9+0)*4]  // get ch_assign from stack
467        ldr     CHAN4, [CHAN0]
468 .if \channels == 2
469        uxtb    CHAN0, CHAN4, ror #0
470        uxtb    CHAN1, CHAN4, ror #8
471 .else
472        ldr     CHAN5, [CHAN0, #4]
473  .if \channels == 6
474        uxtb    CHAN0, CHAN4, ror #0
475        uxtb    CHAN1, CHAN4, ror #8
476        uxtb    CHAN2, CHAN4, ror #16
477        uxtb    CHAN3, CHAN4, ror #24
478        uxtb    CHAN4, CHAN5, ror #0
479        uxtb    CHAN5, CHAN5, ror #8
480  .endif
481 .endif
482 .set IDX1, \channels
483 .set IDX2, \channels
4840:
485 .rept WORDS_PER_LOOP / 4
486        output4words
487 .endr
488        subs    COUNT, COUNT, #SAMPLES_PER_LOOP
489        bne     0b
490        pop     {v1-v6,sl,fp,pc}
491        .ltorg
492endfunc
493 .purgem output4words
494
495        .unreq  CHECK
496        .unreq  COUNT
497        .unreq  IN
498        .unreq  OUT
499        .unreq  DAT0
500        .unreq  DAT1
501        .unreq  DAT2
502        .unreq  DAT3
503        .unreq  CHAN0
504        .unreq  CHAN1
505        .unreq  CHAN2
506        .unreq  CHAN3
507        .unreq  CHAN4
508        .unreq  CHAN5
509
510#endif // !CONFIG_THUMB
511
512.endif // mixed
513.endif // inorder
514.endm // implement_pack
515
516.macro pack_channels  inorder, channels
517        implement_pack  \inorder, \channels, 0
518        implement_pack  \inorder, \channels, 1
519        implement_pack  \inorder, \channels, 2
520        implement_pack  \inorder, \channels, 3
521        implement_pack  \inorder, \channels, 4
522        implement_pack  \inorder, \channels, 5
523        implement_pack  \inorder, \channels, mixed
524.endm
525
526.macro pack_order  inorder
527        pack_channels  \inorder, 2
528        pack_channels  \inorder, 6
529        pack_channels  \inorder, 8
530.endm
531
532        pack_order  0
533        pack_order  1
534