1 /* -*- c++ -*- */
2 /*
3  * Copyright 2018 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING.  If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32f_64f_multiply_64f
25  *
26  * \b Overview
27  *
28  * Multiplies two input double-precision doubleing point vectors together.
29  *
30  * c[i] = a[i] * b[i]
31  *
32  * <b>Dispatcher Prototype</b>
33  * \code
34  * void volk_32f_64f_multiply_64f(double* cVector, const double* aVector, const double* bVector, unsigned int num_points)
35  * \endcode
36  *
37  * \b Inputs
38  * \li aVector: First input vector.
39  * \li bVector: Second input vector.
40  * \li num_points: The number of values in both input vectors.
41  *
42  * \b Outputs
43  * \li cVector: The output vector.
44  *
45  * \b Example
46  * Multiply elements of an increasing vector by those of a decreasing vector.
47  * \code
48  *   int N = 10;
49  *   unsigned int alignment = volk_get_alignment();
50  *   float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
51  *   double* decreasing = (double*)volk_malloc(sizeof(double)*N, alignment);
52  *   double* out = (double*)volk_malloc(sizeof(double)*N, alignment);
53  *
54  *   for(unsigned int ii = 0; ii < N; ++ii){
55  *       increasing[ii] = (double)ii;
56  *       decreasing[ii] = 10.f - (double)ii;
57  *   }
58  *
59  *   volk_32f_64f_multiply_64f(out, increasing, decreasing, N);
60  *
61  *   for(unsigned int ii = 0; ii < N; ++ii){
62  *       printf("out[%u] = %1.2F\n", ii, out[ii]);
63  *   }
64  *
65  *   volk_free(increasing);
66  *   volk_free(decreasing);
67  *   volk_free(out);
68  * \endcode
69  */
70 
71 #ifndef INCLUDED_volk_32f_64f_multiply_64f_H
72 #define INCLUDED_volk_32f_64f_multiply_64f_H
73 
74 #include <inttypes.h>
75 
76 
77 #ifdef LV_HAVE_GENERIC
78 
79 static inline void
volk_32f_64f_multiply_64f_generic(double * cVector,const float * aVector,const double * bVector,unsigned int num_points)80 volk_32f_64f_multiply_64f_generic(double *cVector, const float *aVector,
81                                  const double *bVector, unsigned int num_points)
82 {
83   double *cPtr = cVector;
84   const float *aPtr = aVector;
85   const double *bPtr = bVector;
86   unsigned int number = 0;
87 
88   for (number = 0; number < num_points; number++) {
89     *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
90   }
91 }
92 
93 #endif /* LV_HAVE_GENERIC */
94 
95 /*
96  * Unaligned versions
97  */
98 
99 
100 #ifdef LV_HAVE_AVX
101 
102 #include <immintrin.h>
103 #include <xmmintrin.h>
104 
105 static inline void
volk_32f_64f_multiply_64f_u_avx(double * cVector,const float * aVector,const double * bVector,unsigned int num_points)106 volk_32f_64f_multiply_64f_u_avx(double *cVector, const float *aVector,
107                                const double *bVector, unsigned int num_points)
108 {
109   unsigned int number = 0;
110   const unsigned int eighth_points = num_points / 8;
111 
112   double *cPtr = cVector;
113   const float *aPtr = aVector;
114   const double *bPtr = bVector;
115 
116   __m256 aVal;
117   __m128 aVal1, aVal2;
118   __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
119   for (; number < eighth_points; number++) {
120 
121     aVal = _mm256_loadu_ps(aPtr);
122     bVal1 = _mm256_loadu_pd(bPtr);
123     bVal2 = _mm256_loadu_pd(bPtr+4);
124 
125     aVal1 = _mm256_extractf128_ps(aVal, 0);
126     aVal2 = _mm256_extractf128_ps(aVal, 1);
127 
128     aDbl1 = _mm256_cvtps_pd(aVal1);
129     aDbl2 = _mm256_cvtps_pd(aVal2);
130 
131     cVal1 = _mm256_mul_pd(aDbl1, bVal1);
132     cVal2 = _mm256_mul_pd(aDbl2, bVal2);
133 
134     _mm256_storeu_pd(cPtr, cVal1); // Store the results back into the C container
135     _mm256_storeu_pd(cPtr+4, cVal2); // Store the results back into the C container
136 
137     aPtr += 8;
138     bPtr += 8;
139     cPtr += 8;
140   }
141 
142   number = eighth_points * 8;
143   for (; number < num_points; number++) {
144     *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
145   }
146 }
147 
148 #endif /* LV_HAVE_AVX */
149 
150 
151 #ifdef LV_HAVE_AVX
152 
153 #include <immintrin.h>
154 #include <xmmintrin.h>
155 
156 static inline void
volk_32f_64f_multiply_64f_a_avx(double * cVector,const float * aVector,const double * bVector,unsigned int num_points)157 volk_32f_64f_multiply_64f_a_avx(double *cVector, const float *aVector,
158                                 const double *bVector, unsigned int num_points)
159 {
160   unsigned int number = 0;
161   const unsigned int eighth_points = num_points / 8;
162 
163   double *cPtr = cVector;
164   const float *aPtr = aVector;
165   const double *bPtr = bVector;
166 
167   __m256 aVal;
168   __m128 aVal1, aVal2;
169   __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
170   for (; number < eighth_points; number++) {
171 
172     aVal = _mm256_load_ps(aPtr);
173     bVal1 = _mm256_load_pd(bPtr);
174     bVal2 = _mm256_load_pd(bPtr+4);
175 
176     aVal1 = _mm256_extractf128_ps(aVal, 0);
177     aVal2 = _mm256_extractf128_ps(aVal, 1);
178 
179     aDbl1 = _mm256_cvtps_pd(aVal1);
180     aDbl2 = _mm256_cvtps_pd(aVal2);
181 
182     cVal1 = _mm256_mul_pd(aDbl1, bVal1);
183     cVal2 = _mm256_mul_pd(aDbl2, bVal2);
184 
185     _mm256_store_pd(cPtr, cVal1); // Store the results back into the C container
186     _mm256_store_pd(cPtr+4, cVal2); // Store the results back into the C container
187 
188     aPtr += 8;
189     bPtr += 8;
190     cPtr += 8;
191   }
192 
193   number = eighth_points * 8;
194   for (; number < num_points; number++) {
195     *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
196   }
197 }
198 
199 #endif /* LV_HAVE_AVX */
200 
201 
202 
203 #endif /* INCLUDED_volk_32f_64f_multiply_64f_u_H */
204