1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This file was originally licensed as follows. It has been 11@// relicensed with permission from the copyright holders. 12@// 13 14@// 15@// File Name: armSP_FFT_CToC_SC32_Radix2_unsafe_s.s 16@// OpenMAX DL: v1.0.2 17@// Last Modified Revision: 5638 18@// Last Modified Date: Wed, 06 Jun 2007 19@// 20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21@// 22@// 23@// 24@// Description: 25@// Compute a Radix 2 DIT in-order out-of-place FFT stage for a N point complex signal. 26@// This handle the general stage, not the first or last stage. 27@// 28 29 30@// Include standard headers 31 32#include "dl/api/armCOMM_s.h" 33#include "dl/api/omxtypes_s.h" 34 35 36@// Import symbols required from other files 37@// (For example tables) 38 39 40 41@// Set debugging level 42@//DEBUG_ON SETL {TRUE} 43 44 45 46@// Guarding implementation by the processor name 47 48 49 50 51@// Guarding implementation by the processor name 52 53 54@//Input Registers 55 56#define pSrc r0 57#define pDst r2 58#define pTwiddle r1 59#define subFFTNum r6 60#define subFFTSize r7 61 62 63@//Output Registers 64 65 66@//Local Scratch Registers 67 68#define outPointStep r3 69#define pointStep r4 70#define grpCount r5 71#define setCount r8 72@//const RN 9 73#define step r10 74#define dstStep r11 75#define pTable r9 76#define pTmp r9 77 78@// Neon Registers 79 80#define dW D0.S32 81#define dX0 D2.S32 82#define dX1 D3.S32 83#define dX2 D4.S32 84#define dX3 D5.S32 85#define dY0 D6.S32 86#define dY1 D7.S32 87#define dY2 D8.S32 88#define dY3 D9.S32 89#define qT0 Q3.S64 90#define qT1 Q4.S64 91 92 93 94 .MACRO FFTSTAGE scaled, inverse, name 95 96 @// Define stack arguments 97 98 99 @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs 100 101 LSR subFFTNum,subFFTNum,#1 @//grpSize 102 LSL grpCount,subFFTSize,#1 103 104 105 @// pT0+1 increments pT0 by 8 bytes 106 @// pT0+pointStep = increment of 8*pointStep bytes = 4*grpSize bytes 107 MOV pointStep,subFFTNum,LSL #2 108 109 @// update subFFTSize for the next stage 110 MOV subFFTSize,grpCount 111 112 @// pOut0+1 increments pOut0 by 8 bytes 113 @// pOut0+outPointStep == increment of 8*outPointStep bytes = 4*size bytes 114 SMULBB outPointStep,grpCount,pointStep 115 LSL pointStep,pointStep,#1 116 117 118 RSB step,pointStep,#16 119 RSB dstStep,outPointStep,#16 120 121 @// Loop on the groups 122 123grpLoop\name : 124 MOV setCount,pointStep,LSR #3 125 VLD1 dW,[pTwiddle],pointStep @//[wi | wr] 126 127 128 @// Loop on the sets 129 130 131setLoop\name : 132 133 134 VLD2 {dX0,dX1},[pSrc],pointStep @// point0: dX0-real part dX1-img part 135 VLD2 {dX2,dX3},[pSrc],step @// point1: dX2-real part dX3-img part 136 137 SUBS setCount,setCount,#2 138 139 .ifeqs "\inverse", "TRUE" 140 VMULL qT0,dX2,dW[0] 141 VMLAL qT0,dX3,dW[1] @// real part 142 VMULL qT1,dX3,dW[0] 143 VMLSL qT1,dX2,dW[1] @// imag part 144 145 .else 146 147 VMULL qT0,dX2,dW[0] 148 VMLSL qT0,dX3,dW[1] @// real part 149 VMULL qT1,dX3,dW[0] 150 VMLAL qT1,dX2,dW[1] @// imag part 151 152 .endif 153 154 VRSHRN dX2,qT0,#31 155 VRSHRN dX3,qT1,#31 156 157 .ifeqs "\scaled", "TRUE" 158 VHSUB dY0,dX0,dX2 159 VHSUB dY1,dX1,dX3 160 VHADD dY2,dX0,dX2 161 VHADD dY3,dX1,dX3 162 163 .else 164 VSUB dY0,dX0,dX2 165 VSUB dY1,dX1,dX3 166 VADD dY2,dX0,dX2 167 VADD dY3,dX1,dX3 168 169 .endif 170 171 VST2 {dY0,dY1},[pDst],outPointStep 172 VST2 {dY2,dY3},[pDst],dstStep @// dstStep = -outPointStep + 16 173 174 BGT setLoop\name 175 176 SUBS grpCount,grpCount,#2 177 ADD pSrc,pSrc,pointStep 178 BGT grpLoop\name 179 180 181 @// Reset and Swap pSrc and pDst for the next stage 182 MOV pTmp,pDst 183 SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 4*size; pSrc -= 8*size bytes 184 SUB pSrc,pTmp,outPointStep 185 186 @// Reset pTwiddle for the next stage 187 SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 4*size bytes 188 189 190 .endm 191 192 193 194 M_START armSP_FFTFwd_CToC_SC32_Radix2_OutOfPlace_unsafe,r4 195 FFTSTAGE "FALSE","FALSE",FWD 196 M_END 197 198 199 200 M_START armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe,r4 201 FFTSTAGE "FALSE","TRUE",INV 202 M_END 203 204 205 206 M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe,r4 207 FFTSTAGE "TRUE","FALSE",FWDSFS 208 M_END 209 210 211 212 M_START armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe,r4 213 FFTSTAGE "TRUE","TRUE",INVSFS 214 M_END 215 216 .end 217