1@
2@ ARMv4-optimized halfpel functions
3@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4@
5@ This file is part of FFmpeg.
6@
7@ FFmpeg is free software; you can redistribute it and/or
8@ modify it under the terms of the GNU Lesser General Public
9@ License as published by the Free Software Foundation; either
10@ version 2.1 of the License, or (at your option) any later version.
11@
12@ FFmpeg is distributed in the hope that it will be useful,
13@ but WITHOUT ANY WARRANTY; without even the implied warranty of
14@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15@ Lesser General Public License for more details.
16@
17@ You should have received a copy of the GNU Lesser General Public
18@ License along with FFmpeg; if not, write to the Free Software
19@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20@
21
22#include "config.h"
23#include "libavutil/arm/asm.S"
24
25#if !HAVE_ARMV5TE_EXTERNAL
26#define pld @
27#endif
28
29.macro  ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
30        mov             \Rd0, \Rn0, lsr #(\shift * 8)
31        mov             \Rd1, \Rn1, lsr #(\shift * 8)
32        mov             \Rd2, \Rn2, lsr #(\shift * 8)
33        mov             \Rd3, \Rn3, lsr #(\shift * 8)
34        orr             \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
35        orr             \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
36        orr             \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
37        orr             \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
38.endm
39.macro  ALIGN_DWORD shift, R0, R1, R2
40        mov             \R0, \R0, lsr #(\shift * 8)
41        orr             \R0, \R0, \R1, lsl #(32 - \shift * 8)
42        mov             \R1, \R1, lsr #(\shift * 8)
43        orr             \R1, \R1, \R2, lsl #(32 - \shift * 8)
44.endm
45.macro  ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
46        mov             \Rdst0, \Rsrc0, lsr #(\shift * 8)
47        mov             \Rdst1, \Rsrc1, lsr #(\shift * 8)
48        orr             \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
49        orr             \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
50.endm
51
52.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
53        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
54        @ Rmask = 0xFEFEFEFE
55        @ Rn = destroy
56        eor             \Rd0, \Rn0, \Rm0
57        eor             \Rd1, \Rn1, \Rm1
58        orr             \Rn0, \Rn0, \Rm0
59        orr             \Rn1, \Rn1, \Rm1
60        and             \Rd0, \Rd0, \Rmask
61        and             \Rd1, \Rd1, \Rmask
62        sub             \Rd0, \Rn0, \Rd0, lsr #1
63        sub             \Rd1, \Rn1, \Rd1, lsr #1
64.endm
65
66.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
67        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
68        @ Rmask = 0xFEFEFEFE
69        @ Rn = destroy
70        eor             \Rd0, \Rn0, \Rm0
71        eor             \Rd1, \Rn1, \Rm1
72        and             \Rn0, \Rn0, \Rm0
73        and             \Rn1, \Rn1, \Rm1
74        and             \Rd0, \Rd0, \Rmask
75        and             \Rd1, \Rd1, \Rmask
76        add             \Rd0, \Rn0, \Rd0, lsr #1
77        add             \Rd1, \Rn1, \Rd1, lsr #1
78.endm
79
80.macro  JMP_ALIGN tmp, reg
81        ands            \tmp, \reg, #3
82        bic             \reg, \reg, #3
83        beq             1f
84        subs            \tmp, \tmp, #1
85        beq             2f
86        subs            \tmp, \tmp, #1
87        beq             3f
88        b    4f
89.endm
90
91@ ----------------------------------------------------------------
92function ff_put_pixels16_arm, export=1, align=5
93        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
94        @ block = word aligned, pixles = unaligned
95        pld             [r1]
96        push            {r4-r11, lr}
97        JMP_ALIGN       r5,  r1
981:
99        ldm             r1,  {r4-r7}
100        add             r1,  r1,  r2
101        stm             r0,  {r4-r7}
102        pld             [r1]
103        subs            r3,  r3,  #1
104        add             r0,  r0,  r2
105        bne             1b
106        pop             {r4-r11, pc}
107        .align 5
1082:
109        ldm             r1,  {r4-r8}
110        add             r1,  r1,  r2
111        ALIGN_QWORD_D   1,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
112        pld             [r1]
113        subs            r3,  r3,  #1
114        stm             r0,  {r9-r12}
115        add             r0,  r0,  r2
116        bne             2b
117        pop             {r4-r11, pc}
118        .align 5
1193:
120        ldm             r1,  {r4-r8}
121        add             r1,  r1,  r2
122        ALIGN_QWORD_D   2,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
123        pld             [r1]
124        subs            r3,  r3,  #1
125        stm             r0,  {r9-r12}
126        add             r0,  r0,  r2
127        bne             3b
128        pop             {r4-r11, pc}
129        .align 5
1304:
131        ldm             r1,  {r4-r8}
132        add             r1,  r1,  r2
133        ALIGN_QWORD_D   3,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
134        pld             [r1]
135        subs            r3,  r3,  #1
136        stm             r0,  {r9-r12}
137        add             r0,  r0,  r2
138        bne             4b
139        pop             {r4-r11,pc}
140endfunc
141
142@ ----------------------------------------------------------------
143function ff_put_pixels8_arm, export=1, align=5
144        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
145        @ block = word aligned, pixles = unaligned
146        pld             [r1]
147        push            {r4-r5,lr}
148        JMP_ALIGN       r5,  r1
1491:
150        ldm             r1,  {r4-r5}
151        add             r1,  r1,  r2
152        subs            r3,  r3,  #1
153        pld             [r1]
154        stm             r0,  {r4-r5}
155        add             r0,  r0,  r2
156        bne             1b
157        pop             {r4-r5,pc}
158        .align 5
1592:
160        ldm             r1,  {r4-r5, r12}
161        add             r1,  r1,  r2
162        ALIGN_DWORD     1,   r4,  r5,  r12
163        pld             [r1]
164        subs            r3,  r3,  #1
165        stm             r0,  {r4-r5}
166        add             r0,  r0,  r2
167        bne             2b
168        pop             {r4-r5,pc}
169        .align 5
1703:
171        ldm             r1,  {r4-r5, r12}
172        add             r1,  r1,  r2
173        ALIGN_DWORD     2,   r4,  r5,  r12
174        pld             [r1]
175        subs            r3,  r3,  #1
176        stm             r0,  {r4-r5}
177        add             r0,  r0,  r2
178        bne             3b
179        pop             {r4-r5,pc}
180        .align 5
1814:
182        ldm             r1,  {r4-r5, r12}
183        add             r1,  r1,  r2
184        ALIGN_DWORD     3,   r4,  r5,  r12
185        pld             [r1]
186        subs            r3,  r3,  #1
187        stm             r0,  {r4-r5}
188        add             r0,  r0,  r2
189        bne             4b
190        pop             {r4-r5,pc}
191endfunc
192
193@ ----------------------------------------------------------------
194function ff_put_pixels8_x2_arm, export=1, align=5
195        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
196        @ block = word aligned, pixles = unaligned
197        pld             [r1]
198        push            {r4-r10,lr}
199        ldr             r12, =0xfefefefe
200        JMP_ALIGN       r5,  r1
2011:
202        ldm             r1,  {r4-r5, r10}
203        add             r1,  r1,  r2
204        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
205        pld             [r1]
206        RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
207        subs            r3,  r3,  #1
208        stm             r0,  {r8-r9}
209        add             r0,  r0,  r2
210        bne             1b
211        pop             {r4-r10,pc}
212        .align 5
2132:
214        ldm             r1,  {r4-r5, r10}
215        add             r1,  r1,  r2
216        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
217        ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
218        pld             [r1]
219        RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
220        subs            r3,  r3,  #1
221        stm             r0,  {r4-r5}
222        add             r0,  r0,  r2
223        bne             2b
224        pop             {r4-r10,pc}
225        .align 5
2263:
227        ldm             r1,  {r4-r5, r10}
228        add             r1,  r1,  r2
229        ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
230        ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
231        pld             [r1]
232        RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
233        subs            r3,  r3,  #1
234        stm             r0,  {r4-r5}
235        add             r0,  r0,  r2
236        bne             3b
237        pop             {r4-r10,pc}
238        .align 5
2394:
240        ldm             r1,  {r4-r5, r10}
241        add             r1,  r1,  r2
242        ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
243        pld             [r1]
244        RND_AVG32       r8,  r9,  r6,  r7,  r5,  r10, r12
245        subs            r3,  r3,  #1
246        stm             r0,  {r8-r9}
247        add             r0,  r0,  r2
248        bne             4b
249        pop             {r4-r10,pc}
250endfunc
251
252function ff_put_no_rnd_pixels8_x2_arm, export=1, align=5
253        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
254        @ block = word aligned, pixles = unaligned
255        pld             [r1]
256        push            {r4-r10,lr}
257        ldr             r12, =0xfefefefe
258        JMP_ALIGN       r5,  r1
2591:
260        ldm             r1,  {r4-r5, r10}
261        add             r1,  r1,  r2
262        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
263        pld             [r1]
264        NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
265        subs            r3,  r3,  #1
266        stm             r0,  {r8-r9}
267        add             r0,  r0,  r2
268        bne             1b
269        pop             {r4-r10,pc}
270        .align 5
2712:
272        ldm             r1,  {r4-r5, r10}
273        add             r1,  r1,  r2
274        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
275        ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
276        pld             [r1]
277        NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
278        subs            r3,  r3,  #1
279        stm             r0,  {r4-r5}
280        add             r0,  r0,  r2
281        bne             2b
282        pop             {r4-r10,pc}
283        .align 5
2843:
285        ldm             r1,  {r4-r5, r10}
286        add             r1,  r1,  r2
287        ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
288        ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
289        pld             [r1]
290        NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
291        subs            r3,  r3,  #1
292        stm             r0,  {r4-r5}
293        add             r0,  r0,  r2
294        bne             3b
295        pop             {r4-r10,pc}
296        .align 5
2974:
298        ldm             r1,  {r4-r5, r10}
299        add             r1,  r1,  r2
300        ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
301        pld             [r1]
302        NO_RND_AVG32    r8,  r9,  r6,  r7,  r5,  r10, r12
303        subs            r3,  r3,  #1
304        stm             r0,  {r8-r9}
305        add             r0,  r0,  r2
306        bne             4b
307        pop             {r4-r10,pc}
308endfunc
309
310
311@ ----------------------------------------------------------------
312function ff_put_pixels8_y2_arm, export=1, align=5
313        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
314        @ block = word aligned, pixles = unaligned
315        pld             [r1]
316        push            {r4-r11,lr}
317        mov             r3,  r3,  lsr #1
318        ldr             r12, =0xfefefefe
319        JMP_ALIGN       r5,  r1
3201:
321        ldm             r1,  {r4-r5}
322        add             r1,  r1,  r2
3236:      ldm             r1,  {r6-r7}
324        add             r1,  r1,  r2
325        pld             [r1]
326        RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
327        ldm             r1,  {r4-r5}
328        add             r1,  r1,  r2
329        stm             r0,  {r8-r9}
330        add             r0,  r0,  r2
331        pld             [r1]
332        RND_AVG32       r8,  r9,  r6,  r7,  r4,  r5,  r12
333        subs            r3,  r3,  #1
334        stm             r0,  {r8-r9}
335        add             r0,  r0,  r2
336        bne             6b
337        pop             {r4-r11,pc}
338        .align 5
3392:
340        ldm             r1,  {r4-r6}
341        add             r1,  r1,  r2
342        pld             [r1]
343        ALIGN_DWORD     1,   r4,  r5,  r6
3446:      ldm             r1,  {r7-r9}
345        add             r1,  r1,  r2
346        pld             [r1]
347        ALIGN_DWORD     1,   r7,  r8,  r9
348        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
349        stm             r0,  {r10-r11}
350        add             r0,  r0,  r2
351        ldm             r1,  {r4-r6}
352        add             r1,  r1,  r2
353        pld             [r1]
354        ALIGN_DWORD     1,   r4,  r5,  r6
355        subs            r3,  r3,  #1
356        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
357        stm             r0,  {r10-r11}
358        add             r0,  r0,  r2
359        bne             6b
360        pop             {r4-r11,pc}
361        .align 5
3623:
363        ldm             r1,  {r4-r6}
364        add             r1,  r1,  r2
365        pld             [r1]
366        ALIGN_DWORD     2,   r4,  r5,  r6
3676:      ldm             r1,  {r7-r9}
368        add             r1,  r1,  r2
369        pld             [r1]
370        ALIGN_DWORD     2,   r7,  r8,  r9
371        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
372        stm             r0,  {r10-r11}
373        add             r0,  r0,  r2
374        ldm             r1,  {r4-r6}
375        add             r1,  r1,  r2
376        pld             [r1]
377        ALIGN_DWORD     2,   r4,  r5,  r6
378        subs            r3,  r3,  #1
379        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
380        stm             r0,  {r10-r11}
381        add             r0,  r0,  r2
382        bne             6b
383        pop             {r4-r11,pc}
384        .align 5
3854:
386        ldm             r1,  {r4-r6}
387        add             r1,  r1,  r2
388        pld             [r1]
389        ALIGN_DWORD     3,   r4,  r5,  r6
3906:      ldm             r1,  {r7-r9}
391        add             r1,  r1,  r2
392        pld             [r1]
393        ALIGN_DWORD     3,   r7,  r8,  r9
394        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
395        stm             r0,  {r10-r11}
396        add             r0,  r0,  r2
397        ldm             r1,  {r4-r6}
398        add             r1,  r1,  r2
399        pld             [r1]
400        ALIGN_DWORD     3,   r4,  r5,  r6
401        subs            r3,  r3,  #1
402        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
403        stm             r0,  {r10-r11}
404        add             r0,  r0,  r2
405        bne             6b
406        pop             {r4-r11,pc}
407endfunc
408
409function ff_put_no_rnd_pixels8_y2_arm, export=1, align=5
410        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
411        @ block = word aligned, pixles = unaligned
412        pld             [r1]
413        push            {r4-r11,lr}
414        mov             r3,  r3,  lsr #1
415        ldr             r12, =0xfefefefe
416        JMP_ALIGN       r5,  r1
4171:
418        ldm             r1,  {r4-r5}
419        add             r1,  r1,  r2
4206:      ldm             r1,  {r6-r7}
421        add             r1,  r1,  r2
422        pld             [r1]
423        NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
424        ldm             r1,  {r4-r5}
425        add             r1,  r1,  r2
426        stm             r0,  {r8-r9}
427        add             r0,  r0,  r2
428        pld             [r1]
429        NO_RND_AVG32    r8,  r9,  r6,  r7,  r4,  r5,  r12
430        subs            r3,  r3,  #1
431        stm             r0,  {r8-r9}
432        add             r0,  r0,  r2
433        bne             6b
434        pop             {r4-r11,pc}
435        .align 5
4362:
437        ldm             r1,  {r4-r6}
438        add             r1,  r1,  r2
439        pld             [r1]
440        ALIGN_DWORD     1,   r4,  r5,  r6
4416:      ldm             r1,  {r7-r9}
442        add             r1,  r1,  r2
443        pld             [r1]
444        ALIGN_DWORD     1,   r7,  r8,  r9
445        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
446        stm             r0,  {r10-r11}
447        add             r0,  r0,  r2
448        ldm             r1,  {r4-r6}
449        add             r1,  r1,  r2
450        pld             [r1]
451        ALIGN_DWORD     1,   r4,  r5,  r6
452        subs            r3,  r3,  #1
453        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
454        stm             r0,  {r10-r11}
455        add             r0,  r0,  r2
456        bne             6b
457        pop             {r4-r11,pc}
458        .align 5
4593:
460        ldm             r1,  {r4-r6}
461        add             r1,  r1,  r2
462        pld             [r1]
463        ALIGN_DWORD     2,   r4,  r5,  r6
4646:      ldm             r1,  {r7-r9}
465        add             r1,  r1,  r2
466        pld             [r1]
467        ALIGN_DWORD     2,   r7,  r8,  r9
468        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
469        stm             r0,  {r10-r11}
470        add             r0,  r0,  r2
471        ldm             r1,  {r4-r6}
472        add             r1,  r1,  r2
473        pld             [r1]
474        ALIGN_DWORD     2,   r4,  r5,  r6
475        subs            r3,  r3,  #1
476        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
477        stm             r0,  {r10-r11}
478        add             r0,  r0,  r2
479        bne             6b
480        pop             {r4-r11,pc}
481        .align 5
4824:
483        ldm             r1,  {r4-r6}
484        add             r1,  r1,  r2
485        pld             [r1]
486        ALIGN_DWORD     3,   r4,  r5,  r6
4876:      ldm             r1,  {r7-r9}
488        add             r1,  r1,  r2
489        pld             [r1]
490        ALIGN_DWORD     3,   r7,  r8,  r9
491        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
492        stm             r0,  {r10-r11}
493        add             r0,  r0,  r2
494        ldm             r1,  {r4-r6}
495        add             r1,  r1,  r2
496        pld             [r1]
497        ALIGN_DWORD     3,   r4,  r5,  r6
498        subs            r3,  r3,  #1
499        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
500        stm             r0,  {r10-r11}
501        add             r0,  r0,  r2
502        bne             6b
503        pop             {r4-r11,pc}
504endfunc
505
506        .ltorg
507
508@ ----------------------------------------------------------------
509.macro  RND_XY2_IT align, rnd
510        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
511        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
512.if \align == 0
513        ldm             r1,  {r6-r8}
514.elseif \align == 3
515        ldm             r1,  {r5-r7}
516.else
517        ldm             r1,  {r8-r10}
518.endif
519        add             r1,  r1,  r2
520        pld             [r1]
521.if \align == 0
522        ALIGN_DWORD_D   1,   r4,  r5,  r6,  r7,  r8
523.elseif \align == 1
524        ALIGN_DWORD_D   1,   r4,  r5,  r8,  r9,  r10
525        ALIGN_DWORD_D   2,   r6,  r7,  r8,  r9,  r10
526.elseif \align == 2
527        ALIGN_DWORD_D   2,   r4,  r5,  r8,  r9,  r10
528        ALIGN_DWORD_D   3,   r6,  r7,  r8,  r9,  r10
529.elseif \align == 3
530        ALIGN_DWORD_D   3,   r4,  r5,  r5,  r6,  r7
531.endif
532        ldr             r14, =0x03030303
533        tst             r3,  #1
534        and             r8,  r4,  r14
535        and             r9,  r5,  r14
536        and             r10, r6,  r14
537        and             r11, r7,  r14
538        it              eq
539        andeq           r14, r14, r14, \rnd #1
540        add             r8,  r8,  r10
541        add             r9,  r9,  r11
542        ldr             r12, =0xfcfcfcfc >> 2
543        itt             eq
544        addeq           r8,  r8,  r14
545        addeq           r9,  r9,  r14
546        and             r4,  r12, r4,  lsr #2
547        and             r5,  r12, r5,  lsr #2
548        and             r6,  r12, r6,  lsr #2
549        and             r7,  r12, r7,  lsr #2
550        add             r10, r4,  r6
551        add             r11, r5,  r7
552        subs            r3,  r3,  #1
553.endm
554
555.macro RND_XY2_EXPAND align, rnd
556        RND_XY2_IT      \align, \rnd
5576:      push            {r8-r11}
558        RND_XY2_IT      \align, \rnd
559        pop             {r4-r7}
560        add             r4,  r4,  r8
561        add             r5,  r5,  r9
562        ldr             r14, =0x0f0f0f0f
563        add             r6,  r6,  r10
564        add             r7,  r7,  r11
565        and             r4,  r14, r4,  lsr #2
566        and             r5,  r14, r5,  lsr #2
567        add             r4,  r4,  r6
568        add             r5,  r5,  r7
569        stm             r0,  {r4-r5}
570        add             r0,  r0,  r2
571        bge             6b
572        pop             {r4-r11,pc}
573.endm
574
575function ff_put_pixels8_xy2_arm, export=1, align=5
576        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
577        @ block = word aligned, pixles = unaligned
578        pld             [r1]
579        push            {r4-r11,lr} @ R14 is also called LR
580        JMP_ALIGN       r5,  r1
5811:      RND_XY2_EXPAND  0, lsl
582        .align 5
5832:      RND_XY2_EXPAND  1, lsl
584        .align 5
5853:      RND_XY2_EXPAND  2, lsl
586        .align 5
5874:      RND_XY2_EXPAND  3, lsl
588endfunc
589
590function ff_put_no_rnd_pixels8_xy2_arm, export=1, align=5
591        @ void func(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
592        @ block = word aligned, pixles = unaligned
593        pld             [r1]
594        push            {r4-r11,lr}
595        JMP_ALIGN       r5,  r1
5961:      RND_XY2_EXPAND  0, lsr
597        .align 5
5982:      RND_XY2_EXPAND  1, lsr
599        .align 5
6003:      RND_XY2_EXPAND  2, lsr
601        .align 5
6024:      RND_XY2_EXPAND  3, lsr
603endfunc
604