1@// 2@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3@// 4@// Use of this source code is governed by a BSD-style license 5@// that can be found in the LICENSE file in the root of the source 6@// tree. An additional intellectual property rights grant can be found 7@// in the file PATENTS. All contributing project authors may 8@// be found in the AUTHORS file in the root of the source tree. 9@// 10@// This file was originally licensed as follows. It has been 11@// relicensed with permission from the copyright holders. 12 13@// 14@// 15@// File Name: armSP_FFT_CToC_SC16_Radix2_unsafe_s.s 16@// OpenMAX DL: v1.0.2 17@// Last Modified Revision: 5892 18@// Last Modified Date: Thu, 07 Jun 2007 19@// 20@// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21@// 22@// 23@// 24@// Description: 25@// Compute a Radix 2 FFT stage for a N point complex signal 26@// 27@// 28 29 30@// Include standard headers 31 32#include "dl/api/armCOMM_s.h" 33#include "dl/api/omxtypes_s.h" 34 35 36@// Import symbols required from other files 37@// (For example tables) 38 39 40 41@// Set debugging level 42@//DEBUG_ON SETL {TRUE} 43 44 45 46@// Guarding implementation by the processor name 47 48 49 50 51 @// Guarding implementation by the processor name 52 53 54@//Input Registers 55 56#define pSrc r0 57#define pDst r2 58#define pTwiddle r1 59#define subFFTNum r6 60#define subFFTSize r7 61 62 63@//Output Registers 64 65 66@//Local Scratch Registers 67 68#define outPointStep r3 69#define pointStep r4 70#define grpCount r5 71#define setCount r8 72#define step r10 73#define dstStep r11 74#define pTmp r9 75 76@// Neon Registers 77 78#define dW D0.S16 79#define dX0 D2.S16 80#define dX1 D3.S16 81#define dX2 D4.S16 82#define dX3 D5.S16 83#define dY0 D6.S16 84#define dY1 D7.S16 85#define dY2 D8.S16 86#define dY3 D9.S16 87#define qT0 Q3.S32 88#define qT1 Q4.S32 89 90 91 92 .MACRO FFTSTAGE scaled, inverse, name 93 94 @// Define stack arguments 95 96 97 @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs 98 99 LSR subFFTNum,subFFTNum,#1 @//grpSize 100 LSL grpCount,subFFTSize,#1 101 102 103 @// pT0+1 increments pT0 by 8 bytes 104 @// pT0+pointStep = increment of 4*pointStep bytes = 2*grpSize bytes 105 MOV pointStep,subFFTNum,LSL #1 106 107 @// update subFFTSize for the next stage 108 MOV subFFTSize,grpCount 109 110 @// pOut0+1 increments pOut0 by 8 bytes 111 @// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size bytes 112 SMULBB outPointStep,grpCount,pointStep 113 LSL pointStep,pointStep,#1 114 115 116 RSB step,pointStep,#16 117 RSB dstStep,outPointStep,#16 118 119 @// Loop on the groups 120 121grpLoop\name: 122 123 VLD1 dW,[pTwiddle],pointStep @//[wi | wr] 124 MOV setCount,pointStep,LSR #2 125 126 127 @// Loop on the sets: 4 at a time 128 129 130setLoop\name: 131 132 133 VLD2 {dX0,dX1},[pSrc],pointStep @// point0: dX0-real part dX1-img part 134 VLD2 {dX2,dX3},[pSrc],step @// point1: dX2-real part dX3-img part 135 136 SUBS setCount,setCount,#4 137 138 .ifeqs "\inverse", "TRUE" 139 VMULL qT0,dX2,dW[0] 140 VMLAL qT0,dX3,dW[1] @// real part 141 VMULL qT1,dX3,dW[0] 142 VMLSL qT1,dX2,dW[1] @// imag part 143 144 .ELSE 145 146 VMULL qT0,dX2,dW[0] 147 VMLSL qT0,dX3,dW[1] @// real part 148 VMULL qT1,dX3,dW[0] 149 VMLAL qT1,dX2,dW[1] @// imag part 150 151 .ENDIF 152 153 VRSHRN dX2,qT0,#15 154 VRSHRN dX3,qT1,#15 155 156 .ifeqs "\scaled", "TRUE" 157 VHSUB dY0,dX0,dX2 158 VHSUB dY1,dX1,dX3 159 VHADD dY2,dX0,dX2 160 VHADD dY3,dX1,dX3 161 162 .ELSE 163 VSUB dY0,dX0,dX2 164 VSUB dY1,dX1,dX3 165 VADD dY2,dX0,dX2 166 VADD dY3,dX1,dX3 167 168 .ENDIF 169 170 VST2 {dY0,dY1},[pDst],outPointStep 171 VST2 {dY2,dY3},[pDst],dstStep @// dstStep = -outPointStep + 16 172 173 BGT setLoop\name 174 175 SUBS grpCount,grpCount,#2 176 ADD pSrc,pSrc,pointStep 177 BGT grpLoop\name 178 179 180 @// Reset and Swap pSrc and pDst for the next stage 181 MOV pTmp,pDst 182 SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 2*size; pSrc -= 4*size bytes 183 SUB pSrc,pTmp,outPointStep 184 185 @// Reset pTwiddle for the next stage 186 SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 2*size bytes 187 188 189 .endm 190 191 192 193 M_START armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe,r4 194 FFTSTAGE "FALSE","FALSE",FWD 195 M_END 196 197 198 199 M_START armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe,r4 200 FFTSTAGE "FALSE","TRUE",INV 201 M_END 202 203 204 205 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4 206 FFTSTAGE "TRUE","FALSE",FWDSFS 207 M_END 208 209 210 211 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe,r4 212 FFTSTAGE "TRUE","TRUE",INVSFS 213 M_END 214 215 216 217 218 219 .END 220