1@ static inline void volk_32fc_x2_dot_prod_32fc_a_neonasm_opttests(float* cVector, const float* aVector, const float* bVector, unsigned int num_points)@
2.global	volk_32fc_x2_dot_prod_32fc_a_neonasm_opttests
3volk_32fc_x2_dot_prod_32fc_a_neonasm_opttests:
4    push    {r4, r5, r6, r7, r8, r9, sl, fp, lr}
5    vpush   {d8-d15}
6    lsrs    fp, r3, #3
7    sub     sp, sp, #52     @ 0x34
8    mov     r9, r3
9    mov     sl, r0
10    mov     r7, r1
11    mov     r8, r2
12    vorr    q0, q7, q7
13    vorr    q1, q7, q7
14    vorr    q2, q7, q7
15    vorr    q3, q7, q7
16    vorr    q4, q7, q7
17    vorr    q5, q7, q7
18    veor    q6, q7, q7
19    vorr    q7, q7, q7
20    beq     .smallvector
21    mov     r4, r1
22    mov     ip, r2
23    mov     r3, #0
24.mainloop:
25    @mov     r6, ip
26    @mov     r5, r4
27    vld4.32 {d24,d26,d28,d30}, [r6]!
28    @add     ip, ip, #64     @ 0x40
29    @add     r4, r4, #64     @ 0x40
30    vld4.32 {d16,d18,d20,d22}, [r5]!
31    add     r3, r3, #1
32    vld4.32 {d25,d27,d29,d31}, [r6]!
33    vld4.32 {d17,d19,d21,d23}, [r5]!
34    vmla.f32        q6, q8, q12
35    vmla.f32        q0, q9, q12
36    cmp     r3, fp
37    vmls.f32        q5, q13, q9
38    vmla.f32        q2, q13, q8
39    vmla.f32        q7, q10, q14
40    vmla.f32        q1, q11, q14
41    vmls.f32        q4, q15, q11
42    vmla.f32        q3, q15, q10
43    bne     .mainloop
44    lsl     r3, fp, #6
45    add     r8, r8, r3
46    add     r7, r7, r3
47.smallvector:
48    vadd.f32        q3, q2, q3
49    add     r3, sp, #16
50    lsl     r4, fp, #3
51    vadd.f32        q4, q5, q4
52    cmp     r9, r4
53    vadd.f32        q6, q6, q7
54    vadd.f32        q1, q0, q1
55    vadd.f32        q8, q6, q4
56    vadd.f32        q9, q1, q3
57    vst2.32 {d16-d19}, [r3 :64]
58    vldr    s15, [sp, #24]
59    vldr    s16, [sp, #16]
60    vldr    s17, [sp, #20]
61    vadd.f32        s16, s16, s15
62    vldr    s11, [sp, #28]
63    vldr    s12, [sp, #40]  @ 0x28
64    vldr    s13, [sp, #44]  @ 0x2c
65    vldr    s14, [sp, #32]
66    vldr    s15, [sp, #36]  @ 0x24
67    vadd.f32        s17, s17, s11
68    vadd.f32        s16, s16, s12
69    vadd.f32        s17, s17, s13
70    vadd.f32        s16, s16, s14
71    vadd.f32        s17, s17, s15
72    vstr    s16, [sl]
73    vstr    s17, [sl, #4]
74    bls     .epilog
75    add     r5, sp, #8
76.tailcase:
77    ldr     r3, [r7], #8
78    mov     r0, r5
79    ldr     r1, [r8], #8
80    add     r4, r4, #1
81    ldr     ip, [r7, #-4]
82    ldr     r2, [r8, #-4]
83    str     ip, [sp]
84    bl      __mulsc3
85    vldr    s14, [sp, #8]
86    vldr    s15, [sp, #12]
87    vadd.f32        s16, s16, s14
88    cmp     r4, r9
89    vadd.f32        s17, s17, s15
90    vstr    s16, [sl]
91    vstr    s17, [sl, #4]
92    bne     .tailcase
93.epilog:
94    add     sp, sp, #52     @ 0x34
95    vpop    {d8-d15}
96    pop     {r4, r5, r6, r7, r8, r9, sl, fp, pc}
97