1/*
2 * Bluetooth low-complexity, subband codec (SBC)
3 *
4 * Copyright (C) 2017  Aurelien Jacobs <aurel@gnuage.org>
5 * Copyright (C) 2008-2010  Nokia Corporation
6 * Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
7 * Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
8 * Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
9 *
10 * This file is part of FFmpeg.
11 *
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27/**
28 * @file
29 * SBC ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline.
30 */
31
32#include "libavutil/arm/asm.S"
33
34function ff_sbc_analyze_4_armv6, export=1
35        @ r0 = in, r1 = out, r2 = consts
36        push            {r1, r3-r7, lr}
37        push            {r8-r12, r14}
38        ldrd            r4,  r5,  [r0, #0]
39        ldrd            r6,  r7,  [r2, #0]
40        ldrd            r8,  r9,  [r0, #16]
41        ldrd            r10, r11, [r2, #16]
42        mov             r14, #0x8000
43        smlad           r3,  r4,  r6,  r14
44        smlad           r12, r5,  r7,  r14
45        ldrd            r4,  r5,  [r0, #32]
46        ldrd            r6,  r7,  [r2, #32]
47        smlad           r3,  r8,  r10, r3
48        smlad           r12, r9,  r11, r12
49        ldrd            r8,  r9,  [r0, #48]
50        ldrd            r10, r11, [r2, #48]
51        smlad           r3,  r4,  r6,  r3
52        smlad           r12, r5,  r7,  r12
53        ldrd            r4,  r5,  [r0, #64]
54        ldrd            r6,  r7,  [r2, #64]
55        smlad           r3,  r8,  r10, r3
56        smlad           r12, r9,  r11, r12
57        ldrd            r8,  r9,  [r0, #8]
58        ldrd            r10, r11, [r2, #8]
59        smlad           r3,  r4,  r6,  r3        @ t1[0] is done
60        smlad           r12, r5,  r7,  r12       @ t1[1] is done
61        ldrd            r4,  r5,  [r0, #24]
62        ldrd            r6,  r7,  [r2, #24]
63        pkhtb           r3,  r12, r3, asr #16    @ combine t1[0] and t1[1]
64        smlad           r12, r8,  r10, r14
65        smlad           r14, r9,  r11, r14
66        ldrd            r8,  r9,  [r0, #40]
67        ldrd            r10, r11, [r2, #40]
68        smlad           r12, r4,  r6,  r12
69        smlad           r14, r5,  r7,  r14
70        ldrd            r4,  r5,  [r0, #56]
71        ldrd            r6,  r7,  [r2, #56]
72        smlad           r12, r8,  r10, r12
73        smlad           r14, r9,  r11, r14
74        ldrd            r8,  r9,  [r0, #72]
75        ldrd            r10, r11, [r2, #72]
76        smlad           r12, r4,  r6,  r12
77        smlad           r14, r5,  r7,  r14
78        ldrd            r4,  r5,  [r2, #80]      @ start loading cos table
79        smlad           r12, r8,  r10, r12       @ t1[2] is done
80        smlad           r14, r9,  r11, r14       @ t1[3] is done
81        ldrd            r6,  r7,  [r2, #88]
82        ldrd            r8,  r9,  [r2, #96]
83        ldrd            r10, r11, [r2, #104]     @ cos table fully loaded
84        pkhtb           r12, r14, r12, asr #16   @ combine t1[2] and t1[3]
85        smuad           r4,  r3,  r4
86        smuad           r5,  r3,  r5
87        smlad           r4,  r12, r8,  r4
88        smlad           r5,  r12, r9,  r5
89        smuad           r6,  r3,  r6
90        smuad           r7,  r3,  r7
91        smlad           r6,  r12, r10, r6
92        smlad           r7,  r12, r11, r7
93        pop             {r8-r12, r14}
94        stmia           r1, {r4, r5, r6, r7}
95        pop             {r1, r3-r7, pc}
96endfunc
97
98function ff_sbc_analyze_8_armv6, export=1
99        @ r0 = in, r1 = out, r2 = consts
100        push            {r1, r3-r7, lr}
101        push            {r8-r12, r14}
102        ldrd            r4,  r5,  [r0, #24]
103        ldrd            r6,  r7,  [r2, #24]
104        ldrd            r8,  r9,  [r0, #56]
105        ldrd            r10, r11, [r2, #56]
106        mov             r14, #0x8000
107        smlad           r3,  r4,  r6,  r14
108        smlad           r12, r5,  r7,  r14
109        ldrd            r4,  r5,  [r0, #88]
110        ldrd            r6,  r7,  [r2, #88]
111        smlad           r3,  r8,  r10, r3
112        smlad           r12, r9,  r11, r12
113        ldrd            r8,  r9,  [r0, #120]
114        ldrd            r10, r11, [r2, #120]
115        smlad           r3,  r4,  r6,  r3
116        smlad           r12, r5,  r7,  r12
117        ldrd            r4,  r5,  [r0, #152]
118        ldrd            r6,  r7,  [r2, #152]
119        smlad           r3,  r8,  r10, r3
120        smlad           r12, r9,  r11, r12
121        ldrd            r8,  r9,  [r0, #16]
122        ldrd            r10, r11, [r2, #16]
123        smlad           r3,  r4,  r6,  r3        @ t1[6] is done
124        smlad           r12, r5,  r7,  r12       @ t1[7] is done
125        ldrd            r4,  r5,  [r0, #48]
126        ldrd            r6,  r7,  [r2, #48]
127        pkhtb           r3,  r12, r3, asr #16    @ combine t1[6] and t1[7]
128        str             r3,  [sp, #-4]!          @ save to stack
129        smlad           r3,  r8,  r10, r14
130        smlad           r12, r9,  r11, r14
131        ldrd            r8,  r9,  [r0, #80]
132        ldrd            r10, r11, [r2, #80]
133        smlad           r3,  r4,  r6,  r3
134        smlad           r12, r5,  r7,  r12
135        ldrd            r4,  r5,  [r0, #112]
136        ldrd            r6,  r7,  [r2, #112]
137        smlad           r3,  r8,  r10, r3
138        smlad           r12, r9,  r11, r12
139        ldrd            r8,  r9,  [r0, #144]
140        ldrd            r10, r11, [r2, #144]
141        smlad           r3,  r4,  r6,  r3
142        smlad           r12, r5,  r7,  r12
143        ldrd            r4,  r5,  [r0, #0]
144        ldrd            r6,  r7,  [r2, #0]
145        smlad           r3,  r8,  r10, r3        @ t1[4] is done
146        smlad           r12, r9,  r11, r12       @ t1[5] is done
147        ldrd            r8,  r9,  [r0, #32]
148        ldrd            r10, r11, [r2, #32]
149        pkhtb           r3,  r12, r3, asr #16    @ combine t1[4] and t1[5]
150        str             r3,  [sp, #-4]!          @ save to stack
151        smlad           r3,  r4,  r6,  r14
152        smlad           r12, r5,  r7,  r14
153        ldrd            r4,  r5,  [r0, #64]
154        ldrd            r6,  r7,  [r2, #64]
155        smlad           r3,  r8,  r10, r3
156        smlad           r12, r9,  r11, r12
157        ldrd            r8,  r9,  [r0, #96]
158        ldrd            r10, r11, [r2, #96]
159        smlad           r3,  r4,  r6,  r3
160        smlad           r12, r5,  r7,  r12
161        ldrd            r4,  r5,  [r0, #128]
162        ldrd            r6,  r7,  [r2, #128]
163        smlad           r3,  r8,  r10, r3
164        smlad           r12, r9,  r11, r12
165        ldrd            r8,  r9,  [r0, #8]
166        ldrd            r10, r11, [r2, #8]
167        smlad           r3,  r4,  r6,  r3        @ t1[0] is done
168        smlad           r12, r5,  r7,  r12       @ t1[1] is done
169        ldrd            r4,  r5,  [r0, #40]
170        ldrd            r6,  r7,  [r2, #40]
171        pkhtb           r3,  r12, r3, asr #16    @ combine t1[0] and t1[1]
172        smlad           r12, r8,  r10, r14
173        smlad           r14, r9,  r11, r14
174        ldrd            r8,  r9,  [r0, #72]
175        ldrd            r10, r11, [r2, #72]
176        smlad           r12, r4,  r6,  r12
177        smlad           r14, r5,  r7,  r14
178        ldrd            r4,  r5,  [r0, #104]
179        ldrd            r6,  r7,  [r2, #104]
180        smlad           r12, r8,  r10, r12
181        smlad           r14, r9,  r11, r14
182        ldrd            r8,  r9,  [r0, #136]
183        ldrd            r10, r11, [r2, #136]!
184        smlad           r12, r4,  r6,  r12
185        smlad           r14, r5,  r7,  r14
186        ldrd            r4,  r5,  [r2, #(160 - 136 + 0)]
187        smlad           r12, r8,  r10, r12       @ t1[2] is done
188        smlad           r14, r9,  r11, r14       @ t1[3] is done
189        ldrd            r6,  r7,  [r2, #(160 - 136 + 8)]
190        smuad           r4,  r3,  r4
191        smuad           r5,  r3,  r5
192        pkhtb           r12, r14, r12, asr #16   @ combine t1[2] and t1[3]
193                                                 @ r3  = t2[0:1]
194                                                 @ r12 = t2[2:3]
195        pop             {r0, r14}                @ t2[4:5], t2[6:7]
196        ldrd            r8,  r9,  [r2, #(160 - 136 + 32)]
197        smuad           r6,  r3,  r6
198        smuad           r7,  r3,  r7
199        ldrd            r10, r11, [r2, #(160 - 136 + 40)]
200        smlad           r4,  r12, r8,  r4
201        smlad           r5,  r12, r9,  r5
202        ldrd            r8,  r9,  [r2, #(160 - 136 + 64)]
203        smlad           r6,  r12, r10, r6
204        smlad           r7,  r12, r11, r7
205        ldrd            r10, r11, [r2, #(160 - 136 + 72)]
206        smlad           r4,  r0,  r8,  r4
207        smlad           r5,  r0,  r9,  r5
208        ldrd            r8,  r9,  [r2, #(160 - 136 + 96)]
209        smlad           r6,  r0,  r10, r6
210        smlad           r7,  r0,  r11, r7
211        ldrd            r10, r11, [r2, #(160 - 136 + 104)]
212        smlad           r4,  r14, r8,  r4
213        smlad           r5,  r14, r9,  r5
214        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 0)]
215        smlad           r6,  r14, r10, r6
216        smlad           r7,  r14, r11, r7
217        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 8)]
218        stmia           r1!, {r4, r5}
219        smuad           r4,  r3,  r8
220        smuad           r5,  r3,  r9
221        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 32)]
222        stmia           r1!, {r6, r7}
223        smuad           r6,  r3,  r10
224        smuad           r7,  r3,  r11
225        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 40)]
226        smlad           r4,  r12, r8,  r4
227        smlad           r5,  r12, r9,  r5
228        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 64)]
229        smlad           r6,  r12, r10, r6
230        smlad           r7,  r12, r11, r7
231        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 72)]
232        smlad           r4,  r0,  r8,  r4
233        smlad           r5,  r0,  r9,  r5
234        ldrd            r8,  r9,  [r2, #(160 - 136 + 16 + 96)]
235        smlad           r6,  r0,  r10, r6
236        smlad           r7,  r0,  r11, r7
237        ldrd            r10, r11, [r2, #(160 - 136 + 16 + 104)]
238        smlad           r4,  r14, r8,  r4
239        smlad           r5,  r14, r9,  r5
240        smlad           r6,  r14, r10, r6
241        smlad           r7,  r14, r11, r7
242        pop             {r8-r12, r14}
243        stmia           r1!, {r4, r5, r6, r7}
244        pop             {r1, r3-r7, pc}
245endfunc
246