1 /* -*- c++ -*- */
2 /*
3 * Copyright 2018 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
23 /*!
24 * \page volk_32f_64f_multiply_64f
25 *
26 * \b Overview
27 *
28 * Multiplies two input double-precision doubleing point vectors together.
29 *
30 * c[i] = a[i] * b[i]
31 *
32 * <b>Dispatcher Prototype</b>
33 * \code
34 * void volk_32f_64f_multiply_64f(double* cVector, const double* aVector, const double* bVector, unsigned int num_points)
35 * \endcode
36 *
37 * \b Inputs
38 * \li aVector: First input vector.
39 * \li bVector: Second input vector.
40 * \li num_points: The number of values in both input vectors.
41 *
42 * \b Outputs
43 * \li cVector: The output vector.
44 *
45 * \b Example
46 * Multiply elements of an increasing vector by those of a decreasing vector.
47 * \code
48 * int N = 10;
49 * unsigned int alignment = volk_get_alignment();
50 * float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
51 * double* decreasing = (double*)volk_malloc(sizeof(double)*N, alignment);
52 * double* out = (double*)volk_malloc(sizeof(double)*N, alignment);
53 *
54 * for(unsigned int ii = 0; ii < N; ++ii){
55 * increasing[ii] = (double)ii;
56 * decreasing[ii] = 10.f - (double)ii;
57 * }
58 *
59 * volk_32f_64f_multiply_64f(out, increasing, decreasing, N);
60 *
61 * for(unsigned int ii = 0; ii < N; ++ii){
62 * printf("out[%u] = %1.2F\n", ii, out[ii]);
63 * }
64 *
65 * volk_free(increasing);
66 * volk_free(decreasing);
67 * volk_free(out);
68 * \endcode
69 */
70
71 #ifndef INCLUDED_volk_32f_64f_multiply_64f_H
72 #define INCLUDED_volk_32f_64f_multiply_64f_H
73
74 #include <inttypes.h>
75
76
77 #ifdef LV_HAVE_GENERIC
78
79 static inline void
volk_32f_64f_multiply_64f_generic(double * cVector,const float * aVector,const double * bVector,unsigned int num_points)80 volk_32f_64f_multiply_64f_generic(double *cVector, const float *aVector,
81 const double *bVector, unsigned int num_points)
82 {
83 double *cPtr = cVector;
84 const float *aPtr = aVector;
85 const double *bPtr = bVector;
86 unsigned int number = 0;
87
88 for (number = 0; number < num_points; number++) {
89 *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
90 }
91 }
92
93 #endif /* LV_HAVE_GENERIC */
94
95 /*
96 * Unaligned versions
97 */
98
99
100 #ifdef LV_HAVE_AVX
101
102 #include <immintrin.h>
103 #include <xmmintrin.h>
104
105 static inline void
volk_32f_64f_multiply_64f_u_avx(double * cVector,const float * aVector,const double * bVector,unsigned int num_points)106 volk_32f_64f_multiply_64f_u_avx(double *cVector, const float *aVector,
107 const double *bVector, unsigned int num_points)
108 {
109 unsigned int number = 0;
110 const unsigned int eighth_points = num_points / 8;
111
112 double *cPtr = cVector;
113 const float *aPtr = aVector;
114 const double *bPtr = bVector;
115
116 __m256 aVal;
117 __m128 aVal1, aVal2;
118 __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
119 for (; number < eighth_points; number++) {
120
121 aVal = _mm256_loadu_ps(aPtr);
122 bVal1 = _mm256_loadu_pd(bPtr);
123 bVal2 = _mm256_loadu_pd(bPtr+4);
124
125 aVal1 = _mm256_extractf128_ps(aVal, 0);
126 aVal2 = _mm256_extractf128_ps(aVal, 1);
127
128 aDbl1 = _mm256_cvtps_pd(aVal1);
129 aDbl2 = _mm256_cvtps_pd(aVal2);
130
131 cVal1 = _mm256_mul_pd(aDbl1, bVal1);
132 cVal2 = _mm256_mul_pd(aDbl2, bVal2);
133
134 _mm256_storeu_pd(cPtr, cVal1); // Store the results back into the C container
135 _mm256_storeu_pd(cPtr+4, cVal2); // Store the results back into the C container
136
137 aPtr += 8;
138 bPtr += 8;
139 cPtr += 8;
140 }
141
142 number = eighth_points * 8;
143 for (; number < num_points; number++) {
144 *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
145 }
146 }
147
148 #endif /* LV_HAVE_AVX */
149
150
151 #ifdef LV_HAVE_AVX
152
153 #include <immintrin.h>
154 #include <xmmintrin.h>
155
156 static inline void
volk_32f_64f_multiply_64f_a_avx(double * cVector,const float * aVector,const double * bVector,unsigned int num_points)157 volk_32f_64f_multiply_64f_a_avx(double *cVector, const float *aVector,
158 const double *bVector, unsigned int num_points)
159 {
160 unsigned int number = 0;
161 const unsigned int eighth_points = num_points / 8;
162
163 double *cPtr = cVector;
164 const float *aPtr = aVector;
165 const double *bPtr = bVector;
166
167 __m256 aVal;
168 __m128 aVal1, aVal2;
169 __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
170 for (; number < eighth_points; number++) {
171
172 aVal = _mm256_load_ps(aPtr);
173 bVal1 = _mm256_load_pd(bPtr);
174 bVal2 = _mm256_load_pd(bPtr+4);
175
176 aVal1 = _mm256_extractf128_ps(aVal, 0);
177 aVal2 = _mm256_extractf128_ps(aVal, 1);
178
179 aDbl1 = _mm256_cvtps_pd(aVal1);
180 aDbl2 = _mm256_cvtps_pd(aVal2);
181
182 cVal1 = _mm256_mul_pd(aDbl1, bVal1);
183 cVal2 = _mm256_mul_pd(aDbl2, bVal2);
184
185 _mm256_store_pd(cPtr, cVal1); // Store the results back into the C container
186 _mm256_store_pd(cPtr+4, cVal2); // Store the results back into the C container
187
188 aPtr += 8;
189 bPtr += 8;
190 cPtr += 8;
191 }
192
193 number = eighth_points * 8;
194 for (; number < num_points; number++) {
195 *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
196 }
197 }
198
199 #endif /* LV_HAVE_AVX */
200
201
202
203 #endif /* INCLUDED_volk_32f_64f_multiply_64f_u_H */
204