1@ static inline void volk_32fc_x2_dot_prod_32fc_a_neonasm_opttests(float* cVector, const float* aVector, const float* bVector, unsigned int num_points)@ 2.global volk_32fc_x2_dot_prod_32fc_a_neonasm_opttests 3volk_32fc_x2_dot_prod_32fc_a_neonasm_opttests: 4 push {r4, r5, r6, r7, r8, r9, sl, fp, lr} 5 vpush {d8-d15} 6 lsrs fp, r3, #3 7 sub sp, sp, #52 @ 0x34 8 mov r9, r3 9 mov sl, r0 10 mov r7, r1 11 mov r8, r2 12 vorr q0, q7, q7 13 vorr q1, q7, q7 14 vorr q2, q7, q7 15 vorr q3, q7, q7 16 vorr q4, q7, q7 17 vorr q5, q7, q7 18 veor q6, q7, q7 19 vorr q7, q7, q7 20 beq .smallvector 21 mov r4, r1 22 mov ip, r2 23 mov r3, #0 24.mainloop: 25 @mov r6, ip 26 @mov r5, r4 27 vld4.32 {d24,d26,d28,d30}, [r6]! 28 @add ip, ip, #64 @ 0x40 29 @add r4, r4, #64 @ 0x40 30 vld4.32 {d16,d18,d20,d22}, [r5]! 31 add r3, r3, #1 32 vld4.32 {d25,d27,d29,d31}, [r6]! 33 vld4.32 {d17,d19,d21,d23}, [r5]! 34 vmla.f32 q6, q8, q12 35 vmla.f32 q0, q9, q12 36 cmp r3, fp 37 vmls.f32 q5, q13, q9 38 vmla.f32 q2, q13, q8 39 vmla.f32 q7, q10, q14 40 vmla.f32 q1, q11, q14 41 vmls.f32 q4, q15, q11 42 vmla.f32 q3, q15, q10 43 bne .mainloop 44 lsl r3, fp, #6 45 add r8, r8, r3 46 add r7, r7, r3 47.smallvector: 48 vadd.f32 q3, q2, q3 49 add r3, sp, #16 50 lsl r4, fp, #3 51 vadd.f32 q4, q5, q4 52 cmp r9, r4 53 vadd.f32 q6, q6, q7 54 vadd.f32 q1, q0, q1 55 vadd.f32 q8, q6, q4 56 vadd.f32 q9, q1, q3 57 vst2.32 {d16-d19}, [r3 :64] 58 vldr s15, [sp, #24] 59 vldr s16, [sp, #16] 60 vldr s17, [sp, #20] 61 vadd.f32 s16, s16, s15 62 vldr s11, [sp, #28] 63 vldr s12, [sp, #40] @ 0x28 64 vldr s13, [sp, #44] @ 0x2c 65 vldr s14, [sp, #32] 66 vldr s15, [sp, #36] @ 0x24 67 vadd.f32 s17, s17, s11 68 vadd.f32 s16, s16, s12 69 vadd.f32 s17, s17, s13 70 vadd.f32 s16, s16, s14 71 vadd.f32 s17, s17, s15 72 vstr s16, [sl] 73 vstr s17, [sl, #4] 74 bls .epilog 75 add r5, sp, #8 76.tailcase: 77 ldr r3, [r7], #8 78 mov r0, r5 79 ldr r1, [r8], #8 80 add r4, r4, #1 81 ldr ip, [r7, #-4] 82 ldr r2, [r8, #-4] 83 str ip, [sp] 84 bl __mulsc3 85 vldr s14, [sp, #8] 86 vldr s15, [sp, #12] 87 vadd.f32 s16, s16, s14 88 cmp r4, r9 89 vadd.f32 s17, s17, s15 90 vstr s16, [sl] 91 vstr s17, [sl, #4] 92 bne .tailcase 93.epilog: 94 add sp, sp, #52 @ 0x34 95 vpop {d8-d15} 96 pop {r4, r5, r6, r7, r8, r9, sl, fp, pc} 97