1 //
2 // Copyright 2012-2013 Ettus Research LLC
3 // Copyright 2018 Ettus Research, a National Instruments Company
4 //
5 // SPDX-License-Identifier: GPL-3.0-or-later
6 //
7
8 #include "convert_common.hpp"
9 #include <uhd/utils/byteswap.hpp>
10 #include <emmintrin.h>
11
12 using namespace uhd::convert;
13
14 template <const int shuf>
pack_sc32_4x(const __m128 & in0,const __m128 & in1,const __m128 & in2,const __m128 & in3,const __m128 & scalar)15 UHD_INLINE __m128i pack_sc32_4x(const __m128& in0,
16 const __m128& in1,
17 const __m128& in2,
18 const __m128& in3,
19 const __m128& scalar)
20 {
21 __m128i tmpi0 = _mm_cvtps_epi32(_mm_mul_ps(in0, scalar));
22 tmpi0 = _mm_shuffle_epi32(tmpi0, shuf);
23 __m128i tmpi1 = _mm_cvtps_epi32(_mm_mul_ps(in1, scalar));
24 tmpi1 = _mm_shuffle_epi32(tmpi1, shuf);
25 const __m128i lo = _mm_packs_epi32(tmpi0, tmpi1);
26
27 __m128i tmpi2 = _mm_cvtps_epi32(_mm_mul_ps(in2, scalar));
28 tmpi2 = _mm_shuffle_epi32(tmpi2, shuf);
29 __m128i tmpi3 = _mm_cvtps_epi32(_mm_mul_ps(in3, scalar));
30 tmpi3 = _mm_shuffle_epi32(tmpi3, shuf);
31 const __m128i hi = _mm_packs_epi32(tmpi2, tmpi3);
32
33 return _mm_packs_epi16(lo, hi);
34 }
35
36 DECLARE_CONVERTER(fc32, 1, sc8_item32_be, 1, PRIORITY_SIMD)
37 {
38 const fc32_t* input = reinterpret_cast<const fc32_t*>(inputs[0]);
39 item32_t* output = reinterpret_cast<item32_t*>(outputs[0]);
40
41 const __m128 scalar = _mm_set_ps1(float(scale_factor));
42 const int shuf = _MM_SHUFFLE(3, 2, 1, 0);
43
44 #define convert_fc32_1_to_sc8_item32_1_bswap_guts(_al_) \
45 for (size_t j = 0; i + 7 < nsamps; i += 8, j += 4) { \
46 /* load from input */ \
47 __m128 tmp0 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 0)); \
48 __m128 tmp1 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 2)); \
49 __m128 tmp2 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 4)); \
50 __m128 tmp3 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 6)); \
51 \
52 /* convert */ \
53 const __m128i tmpi = pack_sc32_4x<shuf>(tmp0, tmp1, tmp2, tmp3, scalar); \
54 \
55 /* store to output */ \
56 _mm_storeu_si128(reinterpret_cast<__m128i*>(output + j), tmpi); \
57 }
58
59 size_t i = 0;
60
61 // dispatch according to alignment
62 if ((size_t(input) & 0xf) == 0) {
63 convert_fc32_1_to_sc8_item32_1_bswap_guts(_)
64 } else {
65 convert_fc32_1_to_sc8_item32_1_bswap_guts(u_)
66 }
67
68 // convert remainder
69 xx_to_item32_sc8<uhd::htonx>(input + i, output + (i / 2), nsamps - i, scale_factor);
70 }
71
72 DECLARE_CONVERTER(fc32, 1, sc8_item32_le, 1, PRIORITY_SIMD)
73 {
74 const fc32_t* input = reinterpret_cast<const fc32_t*>(inputs[0]);
75 item32_t* output = reinterpret_cast<item32_t*>(outputs[0]);
76
77 const __m128 scalar = _mm_set_ps1(float(scale_factor));
78 const int shuf = _MM_SHUFFLE(0, 1, 2, 3);
79
80 #define convert_fc32_1_to_sc8_item32_1_nswap_guts(_al_) \
81 for (size_t j = 0; i + 7 < nsamps; i += 8, j += 4) { \
82 /* load from input */ \
83 __m128 tmp0 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 0)); \
84 __m128 tmp1 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 2)); \
85 __m128 tmp2 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 4)); \
86 __m128 tmp3 = _mm_load##_al_##ps(reinterpret_cast<const float*>(input + i + 6)); \
87 \
88 /* convert */ \
89 const __m128i tmpi = pack_sc32_4x<shuf>(tmp0, tmp1, tmp2, tmp3, scalar); \
90 \
91 /* store to output */ \
92 _mm_storeu_si128(reinterpret_cast<__m128i*>(output + j), tmpi); \
93 }
94
95 size_t i = 0;
96
97 // dispatch according to alignment
98 if ((size_t(input) & 0xf) == 0) {
99 convert_fc32_1_to_sc8_item32_1_nswap_guts(_)
100 } else {
101 convert_fc32_1_to_sc8_item32_1_nswap_guts(u_)
102 }
103
104 // convert remainder
105 xx_to_item32_sc8<uhd::htowx>(input + i, output + (i / 2), nsamps - i, scale_factor);
106 }
107