1/* 2 * Bluetooth low-complexity, subband codec (SBC) 3 * 4 * Copyright (C) 2017 Aurelien Jacobs <aurel@gnuage.org> 5 * Copyright (C) 2008-2010 Nokia Corporation 6 * Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org> 7 * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch> 8 * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com> 9 * 10 * This file is part of FFmpeg. 11 * 12 * FFmpeg is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU Lesser General Public 14 * License as published by the Free Software Foundation; either 15 * version 2.1 of the License, or (at your option) any later version. 16 * 17 * FFmpeg is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 * Lesser General Public License for more details. 21 * 22 * You should have received a copy of the GNU Lesser General Public 23 * License along with FFmpeg; if not, write to the Free Software 24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 25 */ 26 27/** 28 * @file 29 * SBC ARMv6 optimizations. The instructions are scheduled for ARM11 pipeline. 30 */ 31 32#include "libavutil/arm/asm.S" 33 34function ff_sbc_analyze_4_armv6, export=1 35 @ r0 = in, r1 = out, r2 = consts 36 push {r1, r3-r7, lr} 37 push {r8-r12, r14} 38 ldrd r4, r5, [r0, #0] 39 ldrd r6, r7, [r2, #0] 40 ldrd r8, r9, [r0, #16] 41 ldrd r10, r11, [r2, #16] 42 mov r14, #0x8000 43 smlad r3, r4, r6, r14 44 smlad r12, r5, r7, r14 45 ldrd r4, r5, [r0, #32] 46 ldrd r6, r7, [r2, #32] 47 smlad r3, r8, r10, r3 48 smlad r12, r9, r11, r12 49 ldrd r8, r9, [r0, #48] 50 ldrd r10, r11, [r2, #48] 51 smlad r3, r4, r6, r3 52 smlad r12, r5, r7, r12 53 ldrd r4, r5, [r0, #64] 54 ldrd r6, r7, [r2, #64] 55 smlad r3, r8, r10, r3 56 smlad r12, r9, r11, r12 57 ldrd r8, r9, [r0, #8] 58 ldrd r10, r11, [r2, #8] 59 smlad r3, r4, r6, r3 @ t1[0] is done 60 smlad r12, r5, r7, r12 @ t1[1] is done 61 ldrd r4, r5, [r0, #24] 62 ldrd r6, r7, [r2, #24] 63 pkhtb r3, r12, r3, asr #16 @ combine t1[0] and t1[1] 64 smlad r12, r8, r10, r14 65 smlad r14, r9, r11, r14 66 ldrd r8, r9, [r0, #40] 67 ldrd r10, r11, [r2, #40] 68 smlad r12, r4, r6, r12 69 smlad r14, r5, r7, r14 70 ldrd r4, r5, [r0, #56] 71 ldrd r6, r7, [r2, #56] 72 smlad r12, r8, r10, r12 73 smlad r14, r9, r11, r14 74 ldrd r8, r9, [r0, #72] 75 ldrd r10, r11, [r2, #72] 76 smlad r12, r4, r6, r12 77 smlad r14, r5, r7, r14 78 ldrd r4, r5, [r2, #80] @ start loading cos table 79 smlad r12, r8, r10, r12 @ t1[2] is done 80 smlad r14, r9, r11, r14 @ t1[3] is done 81 ldrd r6, r7, [r2, #88] 82 ldrd r8, r9, [r2, #96] 83 ldrd r10, r11, [r2, #104] @ cos table fully loaded 84 pkhtb r12, r14, r12, asr #16 @ combine t1[2] and t1[3] 85 smuad r4, r3, r4 86 smuad r5, r3, r5 87 smlad r4, r12, r8, r4 88 smlad r5, r12, r9, r5 89 smuad r6, r3, r6 90 smuad r7, r3, r7 91 smlad r6, r12, r10, r6 92 smlad r7, r12, r11, r7 93 pop {r8-r12, r14} 94 stmia r1, {r4, r5, r6, r7} 95 pop {r1, r3-r7, pc} 96endfunc 97 98function ff_sbc_analyze_8_armv6, export=1 99 @ r0 = in, r1 = out, r2 = consts 100 push {r1, r3-r7, lr} 101 push {r8-r12, r14} 102 ldrd r4, r5, [r0, #24] 103 ldrd r6, r7, [r2, #24] 104 ldrd r8, r9, [r0, #56] 105 ldrd r10, r11, [r2, #56] 106 mov r14, #0x8000 107 smlad r3, r4, r6, r14 108 smlad r12, r5, r7, r14 109 ldrd r4, r5, [r0, #88] 110 ldrd r6, r7, [r2, #88] 111 smlad r3, r8, r10, r3 112 smlad r12, r9, r11, r12 113 ldrd r8, r9, [r0, #120] 114 ldrd r10, r11, [r2, #120] 115 smlad r3, r4, r6, r3 116 smlad r12, r5, r7, r12 117 ldrd r4, r5, [r0, #152] 118 ldrd r6, r7, [r2, #152] 119 smlad r3, r8, r10, r3 120 smlad r12, r9, r11, r12 121 ldrd r8, r9, [r0, #16] 122 ldrd r10, r11, [r2, #16] 123 smlad r3, r4, r6, r3 @ t1[6] is done 124 smlad r12, r5, r7, r12 @ t1[7] is done 125 ldrd r4, r5, [r0, #48] 126 ldrd r6, r7, [r2, #48] 127 pkhtb r3, r12, r3, asr #16 @ combine t1[6] and t1[7] 128 str r3, [sp, #-4]! @ save to stack 129 smlad r3, r8, r10, r14 130 smlad r12, r9, r11, r14 131 ldrd r8, r9, [r0, #80] 132 ldrd r10, r11, [r2, #80] 133 smlad r3, r4, r6, r3 134 smlad r12, r5, r7, r12 135 ldrd r4, r5, [r0, #112] 136 ldrd r6, r7, [r2, #112] 137 smlad r3, r8, r10, r3 138 smlad r12, r9, r11, r12 139 ldrd r8, r9, [r0, #144] 140 ldrd r10, r11, [r2, #144] 141 smlad r3, r4, r6, r3 142 smlad r12, r5, r7, r12 143 ldrd r4, r5, [r0, #0] 144 ldrd r6, r7, [r2, #0] 145 smlad r3, r8, r10, r3 @ t1[4] is done 146 smlad r12, r9, r11, r12 @ t1[5] is done 147 ldrd r8, r9, [r0, #32] 148 ldrd r10, r11, [r2, #32] 149 pkhtb r3, r12, r3, asr #16 @ combine t1[4] and t1[5] 150 str r3, [sp, #-4]! @ save to stack 151 smlad r3, r4, r6, r14 152 smlad r12, r5, r7, r14 153 ldrd r4, r5, [r0, #64] 154 ldrd r6, r7, [r2, #64] 155 smlad r3, r8, r10, r3 156 smlad r12, r9, r11, r12 157 ldrd r8, r9, [r0, #96] 158 ldrd r10, r11, [r2, #96] 159 smlad r3, r4, r6, r3 160 smlad r12, r5, r7, r12 161 ldrd r4, r5, [r0, #128] 162 ldrd r6, r7, [r2, #128] 163 smlad r3, r8, r10, r3 164 smlad r12, r9, r11, r12 165 ldrd r8, r9, [r0, #8] 166 ldrd r10, r11, [r2, #8] 167 smlad r3, r4, r6, r3 @ t1[0] is done 168 smlad r12, r5, r7, r12 @ t1[1] is done 169 ldrd r4, r5, [r0, #40] 170 ldrd r6, r7, [r2, #40] 171 pkhtb r3, r12, r3, asr #16 @ combine t1[0] and t1[1] 172 smlad r12, r8, r10, r14 173 smlad r14, r9, r11, r14 174 ldrd r8, r9, [r0, #72] 175 ldrd r10, r11, [r2, #72] 176 smlad r12, r4, r6, r12 177 smlad r14, r5, r7, r14 178 ldrd r4, r5, [r0, #104] 179 ldrd r6, r7, [r2, #104] 180 smlad r12, r8, r10, r12 181 smlad r14, r9, r11, r14 182 ldrd r8, r9, [r0, #136] 183 ldrd r10, r11, [r2, #136]! 184 smlad r12, r4, r6, r12 185 smlad r14, r5, r7, r14 186 ldrd r4, r5, [r2, #(160 - 136 + 0)] 187 smlad r12, r8, r10, r12 @ t1[2] is done 188 smlad r14, r9, r11, r14 @ t1[3] is done 189 ldrd r6, r7, [r2, #(160 - 136 + 8)] 190 smuad r4, r3, r4 191 smuad r5, r3, r5 192 pkhtb r12, r14, r12, asr #16 @ combine t1[2] and t1[3] 193 @ r3 = t2[0:1] 194 @ r12 = t2[2:3] 195 pop {r0, r14} @ t2[4:5], t2[6:7] 196 ldrd r8, r9, [r2, #(160 - 136 + 32)] 197 smuad r6, r3, r6 198 smuad r7, r3, r7 199 ldrd r10, r11, [r2, #(160 - 136 + 40)] 200 smlad r4, r12, r8, r4 201 smlad r5, r12, r9, r5 202 ldrd r8, r9, [r2, #(160 - 136 + 64)] 203 smlad r6, r12, r10, r6 204 smlad r7, r12, r11, r7 205 ldrd r10, r11, [r2, #(160 - 136 + 72)] 206 smlad r4, r0, r8, r4 207 smlad r5, r0, r9, r5 208 ldrd r8, r9, [r2, #(160 - 136 + 96)] 209 smlad r6, r0, r10, r6 210 smlad r7, r0, r11, r7 211 ldrd r10, r11, [r2, #(160 - 136 + 104)] 212 smlad r4, r14, r8, r4 213 smlad r5, r14, r9, r5 214 ldrd r8, r9, [r2, #(160 - 136 + 16 + 0)] 215 smlad r6, r14, r10, r6 216 smlad r7, r14, r11, r7 217 ldrd r10, r11, [r2, #(160 - 136 + 16 + 8)] 218 stmia r1!, {r4, r5} 219 smuad r4, r3, r8 220 smuad r5, r3, r9 221 ldrd r8, r9, [r2, #(160 - 136 + 16 + 32)] 222 stmia r1!, {r6, r7} 223 smuad r6, r3, r10 224 smuad r7, r3, r11 225 ldrd r10, r11, [r2, #(160 - 136 + 16 + 40)] 226 smlad r4, r12, r8, r4 227 smlad r5, r12, r9, r5 228 ldrd r8, r9, [r2, #(160 - 136 + 16 + 64)] 229 smlad r6, r12, r10, r6 230 smlad r7, r12, r11, r7 231 ldrd r10, r11, [r2, #(160 - 136 + 16 + 72)] 232 smlad r4, r0, r8, r4 233 smlad r5, r0, r9, r5 234 ldrd r8, r9, [r2, #(160 - 136 + 16 + 96)] 235 smlad r6, r0, r10, r6 236 smlad r7, r0, r11, r7 237 ldrd r10, r11, [r2, #(160 - 136 + 16 + 104)] 238 smlad r4, r14, r8, r4 239 smlad r5, r14, r9, r5 240 smlad r6, r14, r10, r6 241 smlad r7, r14, r11, r7 242 pop {r8-r12, r14} 243 stmia r1!, {r4, r5, r6, r7} 244 pop {r1, r3-r7, pc} 245endfunc 246