1 /* Copyright (c) 2017, 2019 Evan Nemerson <evan@nemerson.com>
2 *
3 * Permission is hereby granted, free of charge, to any person
4 * obtaining a copy of this software and associated documentation
5 * files (the "Software"), to deal in the Software without
6 * restriction, including without limitation the rights to use, copy,
7 * modify, merge, publish, distribute, sublicense, and/or sell copies
8 * of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "test/test.h"
25 #define SIMDE_TESTS_CURRENT_ISAX sse2
26 #include <simde/x86/sse2.h>
27 #include <test/x86/test-sse2.h>
28
29 #if defined(HEDLEY_MSVC_VERSION)
30 # pragma warning(disable:4324)
31 #endif
32
33 static int
test_simde_x_mm_abs_pd(SIMDE_MUNIT_TEST_ARGS)34 test_simde_x_mm_abs_pd (SIMDE_MUNIT_TEST_ARGS) {
35 static const struct {
36 const simde_float64 a[2];
37 const simde_float64 r[2];
38 } test_vec[] = {
39 { { SIMDE_FLOAT64_C( 147.28), SIMDE_FLOAT64_C( 704.65) },
40 { SIMDE_FLOAT64_C( 147.28), SIMDE_FLOAT64_C( 704.65) } },
41 { { SIMDE_FLOAT64_C( 136.85), SIMDE_FLOAT64_C( -756.74) },
42 { SIMDE_FLOAT64_C( 136.85), SIMDE_FLOAT64_C( 756.74) } },
43 { { SIMDE_FLOAT64_C( 178.63), SIMDE_FLOAT64_C( -900.20) },
44 { SIMDE_FLOAT64_C( 178.63), SIMDE_FLOAT64_C( 900.20) } },
45 { { SIMDE_FLOAT64_C( -651.54), SIMDE_FLOAT64_C( -517.72) },
46 { SIMDE_FLOAT64_C( 651.54), SIMDE_FLOAT64_C( 517.72) } },
47 { { SIMDE_FLOAT64_C( 75.39), SIMDE_FLOAT64_C( -705.91) },
48 { SIMDE_FLOAT64_C( 75.39), SIMDE_FLOAT64_C( 705.91) } },
49 { { SIMDE_FLOAT64_C( -738.47), SIMDE_FLOAT64_C( -668.92) },
50 { SIMDE_FLOAT64_C( 738.47), SIMDE_FLOAT64_C( 668.92) } },
51 { { SIMDE_FLOAT64_C( 212.72), SIMDE_FLOAT64_C( -499.79) },
52 { SIMDE_FLOAT64_C( 212.72), SIMDE_FLOAT64_C( 499.79) } },
53 { { SIMDE_FLOAT64_C( 481.67), SIMDE_FLOAT64_C( 233.48) },
54 { SIMDE_FLOAT64_C( 481.67), SIMDE_FLOAT64_C( 233.48) } }
55 };
56
57 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
58 simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
59 simde__m128d r = simde_x_mm_abs_pd(a);
60 simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1);
61 }
62
63 return 0;
64 }
65
66 static int
test_simde_mm_add_epi8(SIMDE_MUNIT_TEST_ARGS)67 test_simde_mm_add_epi8 (SIMDE_MUNIT_TEST_ARGS) {
68 struct {
69 int8_t a[16];
70 int8_t b[16];
71 int8_t r[16];
72 } test_vec[] = {
73 { { INT8_C( 90), INT8_C( 118), -INT8_C( 35), -INT8_C( 66), INT8_C( 97), INT8_C( 96), INT8_C( 20), -INT8_C( 105),
74 INT8_C( 78), -INT8_C( 32), INT8_C( 110), -INT8_C( 33), -INT8_C( 12), INT8_C( 9), INT8_C( 119), -INT8_C( 73) },
75 { -INT8_C( 117), -INT8_C( 121), INT8_C( 108), -INT8_C( 124), -INT8_C( 90), INT8_C( 100), -INT8_C( 121), -INT8_C( 115),
76 INT8_C( 60), INT8_C( 124), INT8_C( 32), -INT8_C( 86), INT8_C( 118), -INT8_C( 20), -INT8_C( 123), -INT8_C( 48) },
77 { -INT8_C( 27), -INT8_C( 3), INT8_C( 73), INT8_C( 66), INT8_C( 7), -INT8_C( 60), -INT8_C( 101), INT8_C( 36),
78 -INT8_C( 118), INT8_C( 92), -INT8_C( 114), -INT8_C( 119), INT8_C( 106), -INT8_C( 11), -INT8_C( 4), -INT8_C( 121) } },
79 { { INT8_C( 99), INT8_C( 98), -INT8_C( 113), -INT8_C( 60), -INT8_C( 62), -INT8_C( 93), INT8_C( 92), INT8_C( 16),
80 -INT8_C( 125), -INT8_C( 54), -INT8_C( 16), INT8_C( 119), -INT8_C( 45), INT8_C( 103), INT8_C( 47), INT8_C( 95) },
81 { -INT8_C( 18), -INT8_C( 101), -INT8_C( 29), -INT8_C( 108), -INT8_C( 1), INT8_C( 106), INT8_C( 34), INT8_C( 59),
82 -INT8_C( 26), INT8_C( 66), -INT8_C( 27), INT8_C( 92), INT8_C( 47), INT8_C( 107), INT8_C( 44), -INT8_C( 110) },
83 { INT8_C( 81), -INT8_C( 3), INT8_C( 114), INT8_C( 88), -INT8_C( 63), INT8_C( 13), INT8_C( 126), INT8_C( 75),
84 INT8_C( 105), INT8_C( 12), -INT8_C( 43), -INT8_C( 45), INT8_C( 2), -INT8_C( 46), INT8_C( 91), -INT8_C( 15) } },
85 { { -INT8_C( 51), -INT8_C( 69), INT8_C( 86), -INT8_C( 112), INT8_C( 94), -INT8_C( 78), -INT8_C( 96), -INT8_C( 31),
86 INT8_C( 125), -INT8_C( 112), INT8_C( 89), INT8_C( 80), -INT8_C( 9), -INT8_C( 120), -INT8_C( 81), -INT8_C( 27) },
87 { INT8_C( 35), -INT8_C( 110), INT8_C( 122), INT8_C( 34), -INT8_C( 4), -INT8_C( 100), INT8_C( 94), -INT8_C( 30),
88 -INT8_C( 34), INT8_C( 67), INT8_C( 62), INT8_C( 13), -INT8_C( 82), INT8_C( 107), -INT8_C( 97), INT8_C( 124) },
89 { -INT8_C( 16), INT8_C( 77), -INT8_C( 48), -INT8_C( 78), INT8_C( 90), INT8_C( 78), -INT8_C( 2), -INT8_C( 61),
90 INT8_C( 91), -INT8_C( 45), -INT8_C( 105), INT8_C( 93), -INT8_C( 91), -INT8_C( 13), INT8_C( 78), INT8_C( 97) } },
91 { { INT8_C( 38), -INT8_C( 10), INT8_C( 12), -INT8_C( 123), -INT8_C( 88), -INT8_C( 84), INT8_C( 102), INT8_C( 37),
92 INT8_C( 61), -INT8_C( 65), INT8_C( 118), INT8_C( 52), INT8_C( 71), INT8_C( 37), INT8_C( 26), INT8_C( 106) },
93 { -INT8_C( 72), -INT8_C( 108), -INT8_C( 115), -INT8_C( 76), INT8_C( 48), -INT8_C( 21), -INT8_C( 105), INT8_C( 14),
94 INT8_C( 46), -INT8_C( 43), INT8_C( 28), -INT8_C( 35), INT8_C( 64), -INT8_C( 69), INT8_C( 89), INT8_C( 103) },
95 { -INT8_C( 34), -INT8_C( 118), -INT8_C( 103), INT8_C( 57), -INT8_C( 40), -INT8_C( 105), -INT8_C( 3), INT8_C( 51),
96 INT8_C( 107), -INT8_C( 108), -INT8_C( 110), INT8_C( 17), -INT8_C( 121), -INT8_C( 32), INT8_C( 115), -INT8_C( 47) } },
97 { { -INT8_C( 79), INT8_C( 101), -INT8_C( 20), INT8_C( 90), INT8_C( 17), INT8_C( 82), INT8_MAX, INT8_C( 78),
98 INT8_C( 18), -INT8_C( 11), -INT8_C( 125), INT8_C( 89), INT8_C( 27), -INT8_C( 99), -INT8_C( 60), -INT8_C( 45) },
99 { INT8_C( 49), INT8_C( 81), -INT8_C( 121), INT8_C( 97), INT8_C( 60), INT8_C( 30), INT8_C( 111), INT8_C( 106),
100 -INT8_C( 12), -INT8_C( 117), INT8_C( 71), INT8_C( 52), INT8_C( 71), -INT8_C( 96), -INT8_C( 101), -INT8_C( 8) },
101 { -INT8_C( 30), -INT8_C( 74), INT8_C( 115), -INT8_C( 69), INT8_C( 77), INT8_C( 112), -INT8_C( 18), -INT8_C( 72),
102 INT8_C( 6), INT8_MIN, -INT8_C( 54), -INT8_C( 115), INT8_C( 98), INT8_C( 61), INT8_C( 95), -INT8_C( 53) } },
103 { { INT8_C( 5), -INT8_C( 121), INT8_C( 82), INT8_C( 23), -INT8_C( 38), -INT8_C( 46), INT8_C( 101), -INT8_C( 20),
104 -INT8_C( 57), -INT8_C( 24), INT8_C( 69), -INT8_C( 30), -INT8_C( 123), INT8_C( 9), -INT8_C( 75), -INT8_C( 74) },
105 { INT8_C( 90), INT8_C( 61), INT8_C( 23), -INT8_C( 106), INT8_C( 91), -INT8_C( 121), INT8_C( 1), INT8_C( 79),
106 INT8_C( 18), INT8_C( 72), -INT8_C( 124), INT8_C( 89), -INT8_C( 23), INT8_C( 31), INT8_C( 82), -INT8_C( 18) },
107 { INT8_C( 95), -INT8_C( 60), INT8_C( 105), -INT8_C( 83), INT8_C( 53), INT8_C( 89), INT8_C( 102), INT8_C( 59),
108 -INT8_C( 39), INT8_C( 48), -INT8_C( 55), INT8_C( 59), INT8_C( 110), INT8_C( 40), INT8_C( 7), -INT8_C( 92) } },
109 { { -INT8_C( 89), -INT8_C( 92), INT8_C( 5), -INT8_C( 127), INT8_C( 118), INT8_C( 107), INT8_C( 109), INT8_C( 62),
110 INT8_C( 83), -INT8_C( 78), INT8_C( 32), -INT8_C( 39), -INT8_C( 68), -INT8_C( 42), -INT8_C( 113), INT8_C( 22) },
111 { INT8_C( 19), -INT8_C( 89), -INT8_C( 83), INT8_C( 110), INT8_C( 46), -INT8_C( 82), -INT8_C( 66), INT8_C( 64),
112 -INT8_C( 10), INT8_C( 66), -INT8_C( 102), -INT8_C( 33), INT8_C( 97), -INT8_C( 20), -INT8_C( 50), INT8_C( 8) },
113 { -INT8_C( 70), INT8_C( 75), -INT8_C( 78), -INT8_C( 17), -INT8_C( 92), INT8_C( 25), INT8_C( 43), INT8_C( 126),
114 INT8_C( 73), -INT8_C( 12), -INT8_C( 70), -INT8_C( 72), INT8_C( 29), -INT8_C( 62), INT8_C( 93), INT8_C( 30) } },
115 { { -INT8_C( 112), -INT8_C( 45), -INT8_C( 119), INT8_C( 7), INT8_C( 62), -INT8_C( 10), INT8_C( 69), -INT8_C( 110),
116 -INT8_C( 87), INT8_C( 101), INT8_C( 107), INT8_C( 101), INT8_C( 59), -INT8_C( 6), INT8_C( 123), INT8_C( 78) },
117 { -INT8_C( 95), INT8_C( 40), -INT8_C( 67), -INT8_C( 49), -INT8_C( 42), INT8_C( 123), INT8_C( 16), -INT8_C( 51),
118 -INT8_C( 67), -INT8_C( 86), -INT8_C( 84), INT8_C( 30), -INT8_C( 106), INT8_C( 122), INT8_C( 39), INT8_C( 38) },
119 { INT8_C( 49), -INT8_C( 5), INT8_C( 70), -INT8_C( 42), INT8_C( 20), INT8_C( 113), INT8_C( 85), INT8_C( 95),
120 INT8_C( 102), INT8_C( 15), INT8_C( 23), -INT8_C( 125), -INT8_C( 47), INT8_C( 116), -INT8_C( 94), INT8_C( 116) } }
121 };
122
123 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
124 simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a);
125 simde__m128i b = simde_x_mm_loadu_epi8(test_vec[i].b);
126 simde__m128i r = simde_mm_add_epi8(a, b);
127 simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r));
128 }
129
130 return 0;
131 }
132
133 static int
test_simde_mm_add_epi16(SIMDE_MUNIT_TEST_ARGS)134 test_simde_mm_add_epi16 (SIMDE_MUNIT_TEST_ARGS) {
135 struct {
136 int16_t a[8];
137 int16_t b[8];
138 int16_t r[8];
139 } test_vec[] = {
140 { { -INT16_C( 4111), -INT16_C( 19283), INT16_C( 32346), INT16_C( 31529), INT16_C( 28909), -INT16_C( 11812), INT16_C( 20575), INT16_C( 29075) },
141 { -INT16_C( 31999), INT16_C( 11862), INT16_C( 4324), -INT16_C( 23595), INT16_C( 24767), -INT16_C( 10354), -INT16_C( 11824), -INT16_C( 16113) },
142 { INT16_C( 29426), -INT16_C( 7421), -INT16_C( 28866), INT16_C( 7934), -INT16_C( 11860), -INT16_C( 22166), INT16_C( 8751), INT16_C( 12962) } },
143 { { -INT16_C( 17215), INT16_C( 7029), -INT16_C( 24774), INT16_C( 10134), INT16_C( 29199), INT16_C( 28409), -INT16_C( 29502), -INT16_C( 15137) },
144 { INT16_C( 13584), -INT16_C( 2830), -INT16_C( 14522), INT16_C( 1431), INT16_C( 9512), -INT16_C( 1828), -INT16_C( 5129), -INT16_C( 18247) },
145 { -INT16_C( 3631), INT16_C( 4199), INT16_C( 26240), INT16_C( 11565), -INT16_C( 26825), INT16_C( 26581), INT16_C( 30905), INT16_C( 32152) } },
146 { { INT16_C( 11944), -INT16_C( 7469), INT16_C( 27085), -INT16_C( 9206), INT16_C( 987), -INT16_C( 25013), INT16_C( 10895), -INT16_C( 24734) },
147 { INT16_C( 21600), -INT16_C( 22892), INT16_C( 11036), INT16_C( 17579), -INT16_C( 30895), INT16_C( 18492), -INT16_C( 2701), INT16_C( 6912) },
148 { -INT16_C( 31992), -INT16_C( 30361), -INT16_C( 27415), INT16_C( 8373), -INT16_C( 29908), -INT16_C( 6521), INT16_C( 8194), -INT16_C( 17822) } },
149 { { -INT16_C( 11485), -INT16_C( 3587), INT16_C( 1852), INT16_C( 6093), INT16_C( 6154), -INT16_C( 25931), INT16_C( 5955), -INT16_C( 23751) },
150 { -INT16_C( 12948), -INT16_C( 30647), -INT16_C( 2823), INT16_C( 19148), INT16_C( 2171), -INT16_C( 4462), -INT16_C( 27907), INT16_C( 8201) },
151 { -INT16_C( 24433), INT16_C( 31302), -INT16_C( 971), INT16_C( 25241), INT16_C( 8325), -INT16_C( 30393), -INT16_C( 21952), -INT16_C( 15550) } },
152 { { INT16_C( 1893), -INT16_C( 24303), -INT16_C( 8434), INT16_C( 6584), INT16_C( 28407), INT16_C( 15027), -INT16_C( 4987), -INT16_C( 3619) },
153 { INT16_C( 9914), -INT16_C( 19591), INT16_C( 17690), -INT16_C( 26883), -INT16_C( 28851), INT16_C( 19076), -INT16_C( 29151), -INT16_C( 31125) },
154 { INT16_C( 11807), INT16_C( 21642), INT16_C( 9256), -INT16_C( 20299), -INT16_C( 444), -INT16_C( 31433), INT16_C( 31398), INT16_C( 30792) } },
155 { { INT16_C( 31893), -INT16_C( 23769), -INT16_C( 8357), INT16_C( 21436), INT16_C( 28493), -INT16_C( 11379), INT16_C( 27484), INT16_C( 5828) },
156 { INT16_C( 16017), -INT16_C( 21303), -INT16_C( 14717), -INT16_C( 11966), -INT16_C( 14763), INT16_C( 30235), -INT16_C( 31148), -INT16_C( 5636) },
157 { -INT16_C( 17626), INT16_C( 20464), -INT16_C( 23074), INT16_C( 9470), INT16_C( 13730), INT16_C( 18856), -INT16_C( 3664), INT16_C( 192) } },
158 { { INT16_C( 8963), INT16_C( 24205), INT16_C( 18690), INT16_C( 20657), INT16_C( 16313), INT16_C( 5411), -INT16_C( 6230), INT16_C( 15147) },
159 { -INT16_C( 3035), -INT16_C( 22041), INT16_C( 10682), INT16_C( 3962), -INT16_C( 27152), INT16_C( 17541), -INT16_C( 32484), INT16_C( 7982) },
160 { INT16_C( 5928), INT16_C( 2164), INT16_C( 29372), INT16_C( 24619), -INT16_C( 10839), INT16_C( 22952), INT16_C( 26822), INT16_C( 23129) } },
161 { { -INT16_C( 17500), -INT16_C( 22915), INT16_C( 12036), -INT16_C( 16906), INT16_C( 6510), INT16_C( 6354), -INT16_C( 767), INT16_C( 9811) },
162 { INT16_C( 15345), -INT16_C( 21553), INT16_C( 18788), INT16_C( 21690), INT16_C( 16351), -INT16_C( 1127), -INT16_C( 14400), INT16_C( 25626) },
163 { -INT16_C( 2155), INT16_C( 21068), INT16_C( 30824), INT16_C( 4784), INT16_C( 22861), INT16_C( 5227), -INT16_C( 15167), -INT16_C( 30099) } }
164 };
165
166 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
167 simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a);
168 simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b);
169 simde__m128i r = simde_mm_add_epi16(a, b);
170 simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r));
171 }
172
173 return 0;
174 }
175
176 static int
test_simde_mm_add_epi32(SIMDE_MUNIT_TEST_ARGS)177 test_simde_mm_add_epi32 (SIMDE_MUNIT_TEST_ARGS) {
178 struct {
179 int32_t a[4];
180 int32_t b[4];
181 int32_t r[4];
182 } test_vec[] = {
183 { { INT32_C( 1587156417), INT32_C( 1768270179), -INT32_C( 1942404587), INT32_C( 346970517) },
184 { INT32_C( 2141391970), INT32_C( 1584534422), INT32_C( 1144809083), -INT32_C( 446909148) },
185 { -INT32_C( 566418909), -INT32_C( 942162695), -INT32_C( 797595504), -INT32_C( 99938631) } },
186 { { INT32_C( 776206027), -INT32_C( 1265129313), INT32_C( 2134954218), -INT32_C( 1953239511) },
187 { -INT32_C( 1861535750), -INT32_C( 974160566), INT32_C( 134884324), -INT32_C( 1393727775) },
188 { -INT32_C( 1085329723), INT32_C( 2055677417), -INT32_C( 2025128754), INT32_C( 948000010) } },
189 { { -INT32_C( 69586852), -INT32_C( 1011912232), INT32_C( 1782771777), -INT32_C( 638134562) },
190 { INT32_C( 1466564877), INT32_C( 1646090622), INT32_C( 1718232965), -INT32_C( 384673907) },
191 { INT32_C( 1396978025), INT32_C( 634178390), -INT32_C( 793962554), -INT32_C( 1022808469) } },
192 { { INT32_C( 1625615495), -INT32_C( 1641835683), INT32_C( 1644717443), INT32_C( 1211891259) },
193 { INT32_C( 2124457471), -INT32_C( 2082423298), INT32_C( 1911114724), INT32_C( 710605730) },
194 { -INT32_C( 544894330), INT32_C( 570708315), -INT32_C( 739135129), INT32_C( 1922496989) } },
195 { { INT32_C( 1149910759), INT32_C( 1440918993), INT32_C( 1320676114), -INT32_C( 375983383) },
196 { -INT32_C( 1788397929), -INT32_C( 686209037), INT32_C( 893911698), -INT32_C( 446717186) },
197 { -INT32_C( 638487170), INT32_C( 754709956), -INT32_C( 2080379484), -INT32_C( 822700569) } },
198 { { -INT32_C( 1305810464), -INT32_C( 1475933034), -INT32_C( 503922953), INT32_C( 1204456880) },
199 { -INT32_C( 1210306109), INT32_C( 193918328), -INT32_C( 163522568), INT32_C( 1524342649) },
200 { INT32_C( 1778850723), -INT32_C( 1282014706), -INT32_C( 667445521), -INT32_C( 1566167767) } },
201 { { INT32_C( 504104328), INT32_C( 163975954), -INT32_C( 2115322415), INT32_C( 231257162) },
202 { INT32_C( 1589945573), -INT32_C( 1838591078), -INT32_C( 1551324886), -INT32_C( 788700344) },
203 { INT32_C( 2094049901), -INT32_C( 1674615124), INT32_C( 628319995), -INT32_C( 557443182) } },
204 { { INT32_C( 2079197545), -INT32_C( 310070244), -INT32_C( 1150390415), INT32_C( 164181539) },
205 { INT32_C( 1969720795), INT32_C( 168284384), -INT32_C( 1045524615), INT32_C( 1536273394) },
206 { -INT32_C( 246048956), -INT32_C( 141785860), INT32_C( 2099052266), INT32_C( 1700454933) } }
207 };
208
209 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
210 simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a);
211 simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b);
212 simde__m128i r = simde_mm_add_epi32(a, b);
213 simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
214 }
215
216 return 0;
217 }
218
219 static int
test_simde_mm_add_epi64(SIMDE_MUNIT_TEST_ARGS)220 test_simde_mm_add_epi64 (SIMDE_MUNIT_TEST_ARGS) {
221 struct {
222 int64_t a[2];
223 int64_t b[2];
224 int64_t r[2];
225 } test_vec[] = {
226 { { -INT64_C( 6468439616558299793), INT64_C( 2325632228821341991) },
227 { -INT64_C( 612652056685655455), -INT64_C( 191691543793121214) },
228 { -INT64_C( 7081091673243955248), INT64_C( 2133940685028220777) } },
229 { { -INT64_C( 894566178211475330), INT64_C( 6756798005412736627) },
230 { -INT64_C( 3896691714656888127), INT64_C( 2845879868330258419) },
231 { -INT64_C( 4791257892868363457), -INT64_C( 8844066199966556570) } },
232 { { INT64_C( 7901755739001462504), INT64_C( 1347655258826955098) },
233 { INT64_C( 8953142355952099055), INT64_C( 248677757309780642) },
234 { -INT64_C( 1591845978755990057), INT64_C( 1596333016136735740) } },
235 { { -INT64_C( 8141839393087780454), -INT64_C( 2946030458831039558) },
236 { -INT64_C( 4972663281470790409), INT64_C( 1165720327465335311) },
237 { INT64_C( 5332241399150980753), -INT64_C( 1780310131365704247) } },
238 { { -INT64_C( 15861257455999742), INT64_C( 4357558393977351353) },
239 { INT64_C( 7214407425212598092), -INT64_C( 7045112387664469068) },
240 { INT64_C( 7198546167756598350), -INT64_C( 2687553993687117715) } },
241 { { INT64_C( 4532200698918854304), INT64_C( 7262715306804571977) },
242 { -INT64_C( 803639368974039520), -INT64_C( 4520672699422448119) },
243 { INT64_C( 3728561329944814784), INT64_C( 2742042607382123858) } },
244 { { -INT64_C( 73591731732932298), INT64_C( 6050399403914353275) },
245 { -INT64_C( 5903761005476331555), -INT64_C( 4762108524214604026) },
246 { -INT64_C( 5977352737209263853), INT64_C( 1288290879699749249) } },
247 { { -INT64_C( 7465715716457918288), INT64_C( 2653502295939739981) },
248 { INT64_C( 4698470722568297185), -INT64_C( 3402942170898265983) },
249 { -INT64_C( 2767244993889621103), -INT64_C( 749439874958526002) } }
250 };
251
252 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
253 simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a);
254 simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b);
255 simde__m128i r = simde_mm_add_epi64(a, b);
256 simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r));
257 }
258
259 return 0;
260 }
261
262 static int
test_simde_mm_add_pd(SIMDE_MUNIT_TEST_ARGS)263 test_simde_mm_add_pd (SIMDE_MUNIT_TEST_ARGS) {
264 struct {
265 simde_float64 a[2];
266 simde_float64 b[2];
267 simde_float64 r[2];
268 } test_vec[] = {
269 { { SIMDE_FLOAT64_C( 755.33), SIMDE_FLOAT64_C( 721.25) },
270 { SIMDE_FLOAT64_C( 781.60), SIMDE_FLOAT64_C( -779.68) },
271 { SIMDE_FLOAT64_C( 1536.93), SIMDE_FLOAT64_C( -58.44) } },
272 { { SIMDE_FLOAT64_C( -566.45), SIMDE_FLOAT64_C( -614.54) },
273 { SIMDE_FLOAT64_C( 194.36), SIMDE_FLOAT64_C( -334.34) },
274 { SIMDE_FLOAT64_C( -372.09), SIMDE_FLOAT64_C( -948.88) } },
275 { { SIMDE_FLOAT64_C( 813.61), SIMDE_FLOAT64_C( -315.29) },
276 { SIMDE_FLOAT64_C( 361.18), SIMDE_FLOAT64_C( 614.31) },
277 { SIMDE_FLOAT64_C( 1174.78), SIMDE_FLOAT64_C( 299.02) } },
278 { { SIMDE_FLOAT64_C( 824.96), SIMDE_FLOAT64_C( -193.54) },
279 { SIMDE_FLOAT64_C( 701.59), SIMDE_FLOAT64_C( -521.55) },
280 { SIMDE_FLOAT64_C( 1526.55), SIMDE_FLOAT64_C( -715.09) } },
281 { { SIMDE_FLOAT64_C( -703.59), SIMDE_FLOAT64_C( 322.49) },
282 { SIMDE_FLOAT64_C( -26.00), SIMDE_FLOAT64_C( 910.61) },
283 { SIMDE_FLOAT64_C( -729.59), SIMDE_FLOAT64_C( 1233.10) } },
284 { { SIMDE_FLOAT64_C( -720.23), SIMDE_FLOAT64_C( 197.82) },
285 { SIMDE_FLOAT64_C( -770.39), SIMDE_FLOAT64_C( -888.99) },
286 { SIMDE_FLOAT64_C( -1490.62), SIMDE_FLOAT64_C( -691.16) } },
287 { { SIMDE_FLOAT64_C( 238.41), SIMDE_FLOAT64_C( -248.68) },
288 { SIMDE_FLOAT64_C( -805.44), SIMDE_FLOAT64_C( 805.25) },
289 { SIMDE_FLOAT64_C( -567.03), SIMDE_FLOAT64_C( 556.57) } },
290 { { SIMDE_FLOAT64_C( 13.85), SIMDE_FLOAT64_C( -859.57) },
291 { SIMDE_FLOAT64_C( 840.09), SIMDE_FLOAT64_C( -230.82) },
292 { SIMDE_FLOAT64_C( 853.93), SIMDE_FLOAT64_C( -1090.39) } }
293 };
294
295 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
296 simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
297 simde__m128d b = simde_mm_loadu_pd(test_vec[i].b);
298 simde__m128d r = simde_mm_add_pd(a, b);
299 simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1);
300 }
301
302 return 0;
303 }
304
305 static int
test_simde_mm_add_sd(SIMDE_MUNIT_TEST_ARGS)306 test_simde_mm_add_sd(SIMDE_MUNIT_TEST_ARGS) {
307 const struct {
308 simde__m128d a;
309 simde__m128d b;
310 simde__m128d r;
311 } test_vec[8] = {
312 { simde_mm_set_pd(SIMDE_FLOAT64_C( -348.09), SIMDE_FLOAT64_C( -603.87)),
313 simde_mm_set_pd(SIMDE_FLOAT64_C( 42.81), SIMDE_FLOAT64_C( -955.64)),
314 simde_mm_set_pd(SIMDE_FLOAT64_C( -348.09), SIMDE_FLOAT64_C(-1559.51)) },
315 { simde_mm_set_pd(SIMDE_FLOAT64_C( 600.30), SIMDE_FLOAT64_C( 362.82)),
316 simde_mm_set_pd(SIMDE_FLOAT64_C( -245.13), SIMDE_FLOAT64_C( -144.52)),
317 simde_mm_set_pd(SIMDE_FLOAT64_C( 600.30), SIMDE_FLOAT64_C( 218.30)) },
318 { simde_mm_set_pd(SIMDE_FLOAT64_C( -909.82), SIMDE_FLOAT64_C( -28.51)),
319 simde_mm_set_pd(SIMDE_FLOAT64_C( -141.49), SIMDE_FLOAT64_C( 174.41)),
320 simde_mm_set_pd(SIMDE_FLOAT64_C( -909.82), SIMDE_FLOAT64_C( 145.90)) },
321 { simde_mm_set_pd(SIMDE_FLOAT64_C( -402.79), SIMDE_FLOAT64_C( -225.69)),
322 simde_mm_set_pd(SIMDE_FLOAT64_C( -114.28), SIMDE_FLOAT64_C( 118.74)),
323 simde_mm_set_pd(SIMDE_FLOAT64_C( -402.79), SIMDE_FLOAT64_C( -106.95)) },
324 { simde_mm_set_pd(SIMDE_FLOAT64_C( 476.58), SIMDE_FLOAT64_C( 189.13)),
325 simde_mm_set_pd(SIMDE_FLOAT64_C( 158.24), SIMDE_FLOAT64_C( 133.22)),
326 simde_mm_set_pd(SIMDE_FLOAT64_C( 476.58), SIMDE_FLOAT64_C( 322.35)) },
327 { simde_mm_set_pd(SIMDE_FLOAT64_C( -902.16), SIMDE_FLOAT64_C( -720.35)),
328 simde_mm_set_pd(SIMDE_FLOAT64_C( -496.01), SIMDE_FLOAT64_C( 563.52)),
329 simde_mm_set_pd(SIMDE_FLOAT64_C( -902.16), SIMDE_FLOAT64_C( -156.83)) },
330 { simde_mm_set_pd(SIMDE_FLOAT64_C( 32.48), SIMDE_FLOAT64_C( -172.74)),
331 simde_mm_set_pd(SIMDE_FLOAT64_C( 435.61), SIMDE_FLOAT64_C( 209.72)),
332 simde_mm_set_pd(SIMDE_FLOAT64_C( 32.48), SIMDE_FLOAT64_C( 36.98)) },
333 { simde_mm_set_pd(SIMDE_FLOAT64_C( 322.78), SIMDE_FLOAT64_C( -415.13)),
334 simde_mm_set_pd(SIMDE_FLOAT64_C( -49.82), SIMDE_FLOAT64_C( -195.58)),
335 simde_mm_set_pd(SIMDE_FLOAT64_C( 322.78), SIMDE_FLOAT64_C( -610.71)) }
336 };
337
338 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
339 simde__m128d r = simde_mm_add_sd(test_vec[i].a, test_vec[i].b);
340 simde_assert_m128d_close(r, test_vec[i].r, 1);
341 }
342
343 return 0;
344 }
345
346 static int
test_simde_mm_add_si64(SIMDE_MUNIT_TEST_ARGS)347 test_simde_mm_add_si64(SIMDE_MUNIT_TEST_ARGS) {
348 const struct {
349 simde__m64 a;
350 simde__m64 b;
351 simde__m64 r;
352 } test_vec[8] = {
353 { simde_mm_cvtsi64_m64(INT64_C( 793111073070173174)),
354 simde_mm_cvtsi64_m64(INT64_C( 2108668061446341817)),
355 simde_mm_cvtsi64_m64(INT64_C( 2901779134516514991)), },
356 { simde_mm_cvtsi64_m64(INT64_C( 8875506276833571865)),
357 simde_mm_cvtsi64_m64(INT64_C(-8625831155966083456)),
358 simde_mm_cvtsi64_m64(INT64_C( 249675120867488409)), },
359 { simde_mm_cvtsi64_m64(INT64_C( 2916092148231541839)),
360 simde_mm_cvtsi64_m64(INT64_C( 7640479428881902755)),
361 simde_mm_cvtsi64_m64(INT64_C(-7890172496596107022)), },
362 { simde_mm_cvtsi64_m64(INT64_C(-3448012693901819300)),
363 simde_mm_cvtsi64_m64(INT64_C(-9198379985559078668)),
364 simde_mm_cvtsi64_m64(INT64_C( 5800351394248653648)), },
365 { simde_mm_cvtsi64_m64(INT64_C( 3628113225825158935)),
366 simde_mm_cvtsi64_m64(INT64_C(-1333669735654572042)),
367 simde_mm_cvtsi64_m64(INT64_C( 2294443490170586893)), },
368 { simde_mm_cvtsi64_m64(INT64_C( 5048798289215441413)),
369 simde_mm_cvtsi64_m64(INT64_C( -388036903570542302)),
370 simde_mm_cvtsi64_m64(INT64_C( 4660761385644899111)), },
371 { simde_mm_cvtsi64_m64(INT64_C( 6446512717337269554)),
372 simde_mm_cvtsi64_m64(INT64_C(-7669829270527021775)),
373 simde_mm_cvtsi64_m64(INT64_C(-1223316553189752221)), },
374 { simde_mm_cvtsi64_m64(INT64_C( 6296531259101832881)),
375 simde_mm_cvtsi64_m64(INT64_C( 5834912758815977701)),
376 simde_mm_cvtsi64_m64(INT64_C(-6315300055791741034)), }
377 };
378
379 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
380 simde__m64 r = simde_mm_add_si64(test_vec[i].a, test_vec[i].b);
381 simde_assert_m64_i64(r, ==, test_vec[i].r);
382 }
383
384 return 0;
385 }
386
387 static int
test_simde_mm_adds_epi8(SIMDE_MUNIT_TEST_ARGS)388 test_simde_mm_adds_epi8(SIMDE_MUNIT_TEST_ARGS) {
389 const struct {
390 simde__m128i a;
391 simde__m128i b;
392 simde__m128i r;
393 } test_vec[8] = {
394 { simde_mm_set_epi8(INT8_C( 100), INT8_C( 33), INT8_C( 67), INT8_C( 67), INT8_C( 104), INT8_C(-123), INT8_C( -86), INT8_C( 74),
395 INT8_C( -93), INT8_C( -4), INT8_C( -12), INT8_C( 28), INT8_C( 9), INT8_C( 39), INT8_C( 83), INT8_C( -52)),
396 simde_mm_set_epi8(INT8_C( 40), INT8_C( -64), INT8_C( -19), INT8_C( -17), INT8_C( 67), INT8_C( -93), INT8_C( -22), INT8_C( 98),
397 INT8_C( -73), INT8_C( -83), INT8_C( 107), INT8_C( 95), INT8_C( 59), INT8_C( 84), INT8_C( -72), INT8_C(-115)),
398 simde_mm_set_epi8(INT8_C( 127), INT8_C( -31), INT8_C( 48), INT8_C( 50), INT8_C( 127), INT8_C(-128), INT8_C(-108), INT8_C( 127),
399 INT8_C(-128), INT8_C( -87), INT8_C( 95), INT8_C( 123), INT8_C( 68), INT8_C( 123), INT8_C( 11), INT8_C(-128)) },
400 { simde_mm_set_epi8(INT8_C( 76), INT8_C( 121), INT8_C( 98), INT8_C( 52), INT8_C( 50), INT8_C( -16), INT8_C( 53), INT8_C( 3),
401 INT8_C( -57), INT8_C( -76), INT8_C( -42), INT8_C( 70), INT8_C(-122), INT8_C( 71), INT8_C( -56), INT8_C( -15)),
402 simde_mm_set_epi8(INT8_C( 100), INT8_C( 124), INT8_C( 99), INT8_C( 11), INT8_C( -8), INT8_C( 5), INT8_C( 6), INT8_C( -54),
403 INT8_C( 42), INT8_C( -99), INT8_C( 23), INT8_C(-128), INT8_C( 77), INT8_C( 14), INT8_C( 94), INT8_C( 53)),
404 simde_mm_set_epi8(INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C( 63), INT8_C( 42), INT8_C( -11), INT8_C( 59), INT8_C( -51),
405 INT8_C( -15), INT8_C(-128), INT8_C( -19), INT8_C( -58), INT8_C( -45), INT8_C( 85), INT8_C( 38), INT8_C( 38)) },
406 { simde_mm_set_epi8(INT8_C( -13), INT8_C( 29), INT8_C( 30), INT8_C( 13), INT8_C( 51), INT8_C( 11), INT8_C( -27), INT8_C( -12),
407 INT8_C( 97), INT8_C( 87), INT8_C( 67), INT8_C( 70), INT8_C( 2), INT8_C( -40), INT8_C( 49), INT8_C( 116)),
408 simde_mm_set_epi8(INT8_C( 42), INT8_C( -17), INT8_C( -77), INT8_C( 126), INT8_C(-125), INT8_C( -42), INT8_C( 45), INT8_C( -79),
409 INT8_C( -23), INT8_C( 110), INT8_C( 117), INT8_C( -44), INT8_C( -92), INT8_C( -20), INT8_C(-121), INT8_C( 102)),
410 simde_mm_set_epi8(INT8_C( 29), INT8_C( 12), INT8_C( -47), INT8_C( 127), INT8_C( -74), INT8_C( -31), INT8_C( 18), INT8_C( -91),
411 INT8_C( 74), INT8_C( 127), INT8_C( 127), INT8_C( 26), INT8_C( -90), INT8_C( -60), INT8_C( -72), INT8_C( 127)) },
412 { simde_mm_set_epi8(INT8_C( 55), INT8_C(-106), INT8_C( -49), INT8_C( -49), INT8_C( -85), INT8_C( -58), INT8_C( -56), INT8_C( -25),
413 INT8_C( 78), INT8_C( 18), INT8_C( 71), INT8_C( -12), INT8_C( 86), INT8_C( -84), INT8_C( -77), INT8_C(-116)),
414 simde_mm_set_epi8(INT8_C(-103), INT8_C( 107), INT8_C( 33), INT8_C( -17), INT8_C( 106), INT8_C( 4), INT8_C( -98), INT8_C(-128),
415 INT8_C( 53), INT8_C( 4), INT8_C( 120), INT8_C( -44), INT8_C( -99), INT8_C( 120), INT8_C( -27), INT8_C( 45)),
416 simde_mm_set_epi8(INT8_C( -48), INT8_C( 1), INT8_C( -16), INT8_C( -66), INT8_C( 21), INT8_C( -54), INT8_C(-128), INT8_C(-128),
417 INT8_C( 127), INT8_C( 22), INT8_C( 127), INT8_C( -56), INT8_C( -13), INT8_C( 36), INT8_C(-104), INT8_C( -71)) },
418 { simde_mm_set_epi8(INT8_C( 47), INT8_C( 15), INT8_C( 126), INT8_C(-115), INT8_C( -77), INT8_C( -27), INT8_C( -38), INT8_C( 32),
419 INT8_C( -21), INT8_C( -80), INT8_C( 112), INT8_C( 75), INT8_C( -15), INT8_C( -92), INT8_C( 43), INT8_C( -22)),
420 simde_mm_set_epi8(INT8_C( -33), INT8_C( 127), INT8_C( 123), INT8_C( 65), INT8_C( 63), INT8_C( 85), INT8_C( 75), INT8_C( 99),
421 INT8_C( -2), INT8_C( 13), INT8_C( -46), INT8_C( -8), INT8_C( 127), INT8_C(-115), INT8_C(-109), INT8_C( 14)),
422 simde_mm_set_epi8(INT8_C( 14), INT8_C( 127), INT8_C( 127), INT8_C( -50), INT8_C( -14), INT8_C( 58), INT8_C( 37), INT8_C( 127),
423 INT8_C( -23), INT8_C( -67), INT8_C( 66), INT8_C( 67), INT8_C( 112), INT8_C(-128), INT8_C( -66), INT8_C( -8)) },
424 { simde_mm_set_epi8(INT8_C( 18), INT8_C( 75), INT8_C( 10), INT8_C( 29), INT8_C( 27), INT8_C( 101), INT8_C( -1), INT8_C( 78),
425 INT8_C( -78), INT8_C( 110), INT8_C( 18), INT8_C( 82), INT8_C( -41), INT8_C( 85), INT8_C(-113), INT8_C( 126)),
426 simde_mm_set_epi8(INT8_C( -90), INT8_C( 80), INT8_C(-103), INT8_C(-111), INT8_C( 86), INT8_C( 65), INT8_C( 89), INT8_C( 88),
427 INT8_C( -83), INT8_C(-121), INT8_C( -2), INT8_C( 40), INT8_C( -96), INT8_C( -36), INT8_C( 64), INT8_C( -15)),
428 simde_mm_set_epi8(INT8_C( -72), INT8_C( 127), INT8_C( -93), INT8_C( -82), INT8_C( 113), INT8_C( 127), INT8_C( 88), INT8_C( 127),
429 INT8_C(-128), INT8_C( -11), INT8_C( 16), INT8_C( 122), INT8_C(-128), INT8_C( 49), INT8_C( -49), INT8_C( 111)) },
430 { simde_mm_set_epi8(INT8_C( -90), INT8_C( 48), INT8_C( -43), INT8_C( 22), INT8_C( 78), INT8_C( -17), INT8_C( -78), INT8_C( -64),
431 INT8_C( -97), INT8_C( -80), INT8_C( -51), INT8_C( 72), INT8_C( 114), INT8_C( -11), INT8_C( -89), INT8_C( -93)),
432 simde_mm_set_epi8(INT8_C( 8), INT8_C( 57), INT8_C( 66), INT8_C(-119), INT8_C( 79), INT8_C( -29), INT8_C( -49), INT8_C( 26),
433 INT8_C( -12), INT8_C( -99), INT8_C(-101), INT8_C( 121), INT8_C(-112), INT8_C( -5), INT8_C( -19), INT8_C( -27)),
434 simde_mm_set_epi8(INT8_C( -82), INT8_C( 105), INT8_C( 23), INT8_C( -97), INT8_C( 127), INT8_C( -46), INT8_C(-127), INT8_C( -38),
435 INT8_C(-109), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 2), INT8_C( -16), INT8_C(-108), INT8_C(-120)) },
436 { simde_mm_set_epi8(INT8_C( 26), INT8_C( -15), INT8_C( 12), INT8_C( -66), INT8_C( -7), INT8_C(-115), INT8_C( -21), INT8_C( 27),
437 INT8_C( 111), INT8_C(-126), INT8_C( -43), INT8_C( -94), INT8_C( -97), INT8_C( -34), INT8_C( -47), INT8_C( -79)),
438 simde_mm_set_epi8(INT8_C(-124), INT8_C( -47), INT8_C(-123), INT8_C(-115), INT8_C( -15), INT8_C( -87), INT8_C(-121), INT8_C( -50),
439 INT8_C( 103), INT8_C( 85), INT8_C( 34), INT8_C( -85), INT8_C(-124), INT8_C( 70), INT8_C( 14), INT8_C( -44)),
440 simde_mm_set_epi8(INT8_C( -98), INT8_C( -62), INT8_C(-111), INT8_C(-128), INT8_C( -22), INT8_C(-128), INT8_C(-128), INT8_C( -23),
441 INT8_C( 127), INT8_C( -41), INT8_C( -9), INT8_C(-128), INT8_C(-128), INT8_C( 36), INT8_C( -33), INT8_C(-123)) }
442 };
443
444 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
445 simde__m128i r = simde_mm_adds_epi8(test_vec[i].a, test_vec[i].b);
446 simde_assert_m128i_i8(r, ==, test_vec[i].r);
447 }
448
449 return 0;
450 }
451
452 static int
test_simde_mm_adds_epi16(SIMDE_MUNIT_TEST_ARGS)453 test_simde_mm_adds_epi16(SIMDE_MUNIT_TEST_ARGS) {
454 const struct {
455 simde__m128i a;
456 simde__m128i b;
457 simde__m128i r;
458 } test_vec[8] = {
459 { simde_mm_set_epi16(INT16_C( -9187), INT16_C( 11150), INT16_C(-25711), INT16_C( 30917),
460 INT16_C( 5637), INT16_C( 27391), INT16_C( 20667), INT16_C(-25552)),
461 simde_mm_set_epi16(INT16_C( 15244), INT16_C(-26000), INT16_C(-24422), INT16_C(-28473),
462 INT16_C( -7393), INT16_C( 32134), INT16_C( -161), INT16_C( -2948)),
463 simde_mm_set_epi16(INT16_C( 6057), INT16_C(-14850), INT16_C(-32768), INT16_C( 2444),
464 INT16_C( -1756), INT16_C( 32767), INT16_C( 20506), INT16_C(-28500)) },
465 { simde_mm_set_epi16(INT16_C(-27976), INT16_C( 8581), INT16_C( 17714), INT16_C(-15964),
466 INT16_C(-24791), INT16_C( 29014), INT16_C( -8950), INT16_C(-19859)),
467 simde_mm_set_epi16(INT16_C(-20491), INT16_C(-23795), INT16_C( 5770), INT16_C(-28365),
468 INT16_C( -4266), INT16_C(-14588), INT16_C( 21498), INT16_C( 13063)),
469 simde_mm_set_epi16(INT16_C(-32768), INT16_C(-15214), INT16_C( 23484), INT16_C(-32768),
470 INT16_C(-29057), INT16_C( 14426), INT16_C( 12548), INT16_C( -6796)) },
471 { simde_mm_set_epi16(INT16_C(-24285), INT16_C(-16974), INT16_C( 21513), INT16_C( 30869),
472 INT16_C(-30698), INT16_C( 2555), INT16_C(-20742), INT16_C(-26329)),
473 simde_mm_set_epi16(INT16_C( 19660), INT16_C(-27596), INT16_C( 16650), INT16_C( 30694),
474 INT16_C( 14408), INT16_C( 7632), INT16_C( 15232), INT16_C( -7024)),
475 simde_mm_set_epi16(INT16_C( -4625), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767),
476 INT16_C(-16290), INT16_C( 10187), INT16_C( -5510), INT16_C(-32768)) },
477 { simde_mm_set_epi16(INT16_C( -6143), INT16_C( 5190), INT16_C( -240), INT16_C(-14301),
478 INT16_C( 12856), INT16_C( 32740), INT16_C(-13308), INT16_C( 31639)),
479 simde_mm_set_epi16(INT16_C( 21047), INT16_C(-20544), INT16_C(-28076), INT16_C(-30442),
480 INT16_C( 28180), INT16_C(-18015), INT16_C( 12870), INT16_C( 12342)),
481 simde_mm_set_epi16(INT16_C( 14904), INT16_C(-15354), INT16_C(-28316), INT16_C(-32768),
482 INT16_C( 32767), INT16_C( 14725), INT16_C( -438), INT16_C( 32767)) },
483 { simde_mm_set_epi16(INT16_C( 21004), INT16_C( 26590), INT16_C( -387), INT16_C( 5458),
484 INT16_C( 28558), INT16_C( -1691), INT16_C( 13843), INT16_C( -2265)),
485 simde_mm_set_epi16(INT16_C( 24548), INT16_C(-19288), INT16_C( 1056), INT16_C( 5037),
486 INT16_C( 9790), INT16_C( 12391), INT16_C( -2983), INT16_C( 8158)),
487 simde_mm_set_epi16(INT16_C( 32767), INT16_C( 7302), INT16_C( 669), INT16_C( 10495),
488 INT16_C( 32767), INT16_C( 10700), INT16_C( 10860), INT16_C( 5893)) },
489 { simde_mm_set_epi16(INT16_C( 23035), INT16_C( 14493), INT16_C( 11060), INT16_C(-15265),
490 INT16_C(-25751), INT16_C(-17380), INT16_C(-20209), INT16_C(-22539)),
491 simde_mm_set_epi16(INT16_C(-10338), INT16_C( 26220), INT16_C( -6324), INT16_C( 16083),
492 INT16_C(-20758), INT16_C( 28594), INT16_C(-27719), INT16_C(-21423)),
493 simde_mm_set_epi16(INT16_C( 12697), INT16_C( 32767), INT16_C( 4736), INT16_C( 818),
494 INT16_C(-32768), INT16_C( 11214), INT16_C(-32768), INT16_C(-32768)) },
495 { simde_mm_set_epi16(INT16_C( 1437), INT16_C( -1148), INT16_C( -7704), INT16_C( -3845),
496 INT16_C( 5523), INT16_C( 32157), INT16_C( -3057), INT16_C( -2194)),
497 simde_mm_set_epi16(INT16_C( 20255), INT16_C( 16313), INT16_C( 26265), INT16_C( -5377),
498 INT16_C( 31904), INT16_C( 3795), INT16_C( 20716), INT16_C(-30035)),
499 simde_mm_set_epi16(INT16_C( 21692), INT16_C( 15165), INT16_C( 18561), INT16_C( -9222),
500 INT16_C( 32767), INT16_C( 32767), INT16_C( 17659), INT16_C(-32229)) },
501 { simde_mm_set_epi16(INT16_C( 856), INT16_C( 13772), INT16_C(-17603), INT16_C(-26424),
502 INT16_C( 9957), INT16_C(-11801), INT16_C( 3067), INT16_C(-26950)),
503 simde_mm_set_epi16(INT16_C(-26495), INT16_C(-22337), INT16_C(-30714), INT16_C( 24988),
504 INT16_C(-24287), INT16_C( 11170), INT16_C(-20015), INT16_C( 26834)),
505 simde_mm_set_epi16(INT16_C(-25639), INT16_C( -8565), INT16_C(-32768), INT16_C( -1436),
506 INT16_C(-14330), INT16_C( -631), INT16_C(-16948), INT16_C( -116)) }
507 };
508
509 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
510 simde__m128i r = simde_mm_adds_epi16(test_vec[i].a, test_vec[i].b);
511 simde_assert_m128i_i16(r, ==, test_vec[i].r);
512 }
513
514 return 0;
515 }
516
517 static int
test_simde_mm_adds_epu8(SIMDE_MUNIT_TEST_ARGS)518 test_simde_mm_adds_epu8(SIMDE_MUNIT_TEST_ARGS) {
519 const struct {
520 simde__m128i a;
521 simde__m128i b;
522 simde__m128i r;
523 } test_vec[8] = {
524 { simde_x_mm_set_epu8(243, 185, 175, 84, 114, 173, 219, 130,
525 80, 12, 6, 121, 58, 223, 94, 203),
526 simde_x_mm_set_epu8( 46, 142, 32, 64, 239, 92, 213, 158,
527 92, 20, 62, 216, 2, 162, 3, 226),
528 simde_x_mm_set_epu8(255, 255, 207, 148, 255, 255, 255, 255,
529 172, 32, 68, 255, 60, 255, 97, 255) },
530 { simde_x_mm_set_epu8(200, 115, 63, 101, 233, 139, 164, 230,
531 4, 147, 7, 233, 110, 206, 178, 233),
532 simde_x_mm_set_epu8( 87, 74, 19, 102, 136, 119, 164, 198,
533 113, 170, 154, 7, 191, 195, 220, 182),
534 simde_x_mm_set_epu8(255, 189, 82, 203, 255, 255, 255, 255,
535 117, 255, 161, 240, 255, 255, 255, 255) },
536 { simde_x_mm_set_epu8( 35, 38, 142, 165, 104, 97, 151, 1,
537 79, 16, 160, 140, 19, 109, 210, 120),
538 simde_x_mm_set_epu8( 2, 202, 138, 112, 199, 233, 201, 65,
539 233, 49, 101, 216, 62, 35, 235, 214),
540 simde_x_mm_set_epu8( 37, 240, 255, 255, 255, 255, 255, 66,
541 255, 65, 255, 255, 81, 144, 255, 255) },
542 { simde_x_mm_set_epu8( 98, 74, 253, 101, 187, 74, 205, 52,
543 154, 226, 198, 148, 241, 174, 125, 62),
544 simde_x_mm_set_epu8(163, 110, 1, 166, 233, 185, 220, 101,
545 190, 92, 121, 253, 238, 73, 61, 34),
546 simde_x_mm_set_epu8(255, 184, 254, 255, 255, 255, 255, 153,
547 255, 255, 255, 255, 255, 247, 186, 96) },
548 { simde_x_mm_set_epu8( 91, 28, 52, 18, 175, 61, 49, 67,
549 76, 39, 238, 247, 137, 91, 133, 4),
550 simde_x_mm_set_epu8(142, 255, 123, 14, 70, 48, 62, 186,
551 134, 31, 154, 34, 3, 30, 40, 184),
552 simde_x_mm_set_epu8(233, 255, 175, 32, 245, 109, 111, 253,
553 210, 70, 255, 255, 140, 121, 173, 188) },
554 { simde_x_mm_set_epu8( 32, 230, 94, 17, 123, 186, 43, 67,
555 13, 45, 219, 214, 133, 19, 25, 150),
556 simde_x_mm_set_epu8(114, 27, 244, 244, 84, 0, 108, 198,
557 239, 228, 225, 158, 4, 27, 84, 116),
558 simde_x_mm_set_epu8(146, 255, 255, 255, 207, 186, 151, 255,
559 252, 255, 255, 255, 137, 46, 109, 255) },
560 { simde_x_mm_set_epu8( 66, 152, 8, 32, 7, 222, 46, 10,
561 116, 185, 69, 186, 194, 134, 55, 214),
562 simde_x_mm_set_epu8(185, 11, 114, 201, 179, 122, 77, 244,
563 221, 175, 219, 12, 207, 104, 91, 252),
564 simde_x_mm_set_epu8(251, 163, 122, 233, 186, 255, 123, 254,
565 255, 255, 255, 198, 255, 238, 146, 255) },
566 { simde_x_mm_set_epu8(149, 71, 22, 119, 62, 37, 103, 26,
567 193, 60, 234, 165, 97, 233, 187, 76),
568 simde_x_mm_set_epu8(169, 9, 188, 18, 251, 187, 96, 167,
569 158, 238, 176, 160, 74, 18, 253, 103),
570 simde_x_mm_set_epu8(255, 80, 210, 137, 255, 224, 199, 193,
571 255, 255, 255, 255, 171, 251, 255, 179) }
572 };
573
574 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
575 simde__m128i r = simde_mm_adds_epu8(test_vec[i].a, test_vec[i].b);
576 simde_assert_m128i_u8(r, ==, test_vec[i].r);
577 }
578
579 return 0;
580 }
581
582 static int
test_simde_mm_adds_epu16(SIMDE_MUNIT_TEST_ARGS)583 test_simde_mm_adds_epu16(SIMDE_MUNIT_TEST_ARGS) {
584 const struct {
585 simde__m128i a;
586 simde__m128i b;
587 simde__m128i r;
588 } test_vec[8] = {
589 { simde_x_mm_set_epu16(52397, 2628, 46614, 35162, 54536, 36456, 24004, 11160),
590 simde_x_mm_set_epu16(41921, 12035, 29903, 58497, 1695, 15558, 14248, 61659),
591 simde_x_mm_set_epu16(65535, 14663, 65535, 65535, 56231, 52014, 38252, 65535) },
592 { simde_x_mm_set_epu16(57345, 6650, 8556, 25986, 61163, 19076, 40550, 40920),
593 simde_x_mm_set_epu16(62607, 15369, 35325, 28241, 54252, 5722, 23748, 36984),
594 simde_x_mm_set_epu16(65535, 22019, 43881, 54227, 65535, 24798, 64298, 65535) },
595 { simde_x_mm_set_epu16(19370, 64323, 5781, 65431, 30915, 24348, 65190, 30074),
596 simde_x_mm_set_epu16(34245, 57703, 60540, 40683, 24154, 18750, 32124, 33828),
597 simde_x_mm_set_epu16(53615, 65535, 65535, 65535, 55069, 43098, 65535, 63902) },
598 { simde_x_mm_set_epu16( 1083, 62410, 53296, 45, 57969, 54778, 42038, 36216),
599 simde_x_mm_set_epu16(47446, 36131, 44258, 13796, 53696, 55457, 27279, 19924),
600 simde_x_mm_set_epu16(48529, 65535, 65535, 13841, 65535, 65535, 65535, 56140) },
601 { simde_x_mm_set_epu16(53022, 40173, 23284, 53830, 27939, 30100, 61471, 602),
602 simde_x_mm_set_epu16(42952, 36449, 22644, 6670, 537, 5689, 73, 2247),
603 simde_x_mm_set_epu16(65535, 65535, 45928, 60500, 28476, 35789, 61544, 2849) },
604 { simde_x_mm_set_epu16( 8441, 24815, 22801, 35056, 30653, 5655, 39135, 32848),
605 simde_x_mm_set_epu16( 7115, 32196, 31449, 51212, 54481, 9348, 63499, 54202),
606 simde_x_mm_set_epu16(15556, 57011, 54250, 65535, 65535, 15003, 65535, 65535) },
607 { simde_x_mm_set_epu16( 5059, 20924, 5143, 29698, 39512, 42596, 50907, 48157),
608 simde_x_mm_set_epu16(55259, 30633, 10948, 60956, 47288, 59136, 49334, 11432),
609 simde_x_mm_set_epu16(60318, 51557, 16091, 65535, 65535, 65535, 65535, 59589) },
610 { simde_x_mm_set_epu16(53397, 1584, 56368, 64962, 35166, 11367, 24855, 22370),
611 simde_x_mm_set_epu16( 5862, 9719, 15493, 14762, 25151, 48370, 30737, 29969),
612 simde_x_mm_set_epu16(59259, 11303, 65535, 65535, 60317, 59737, 55592, 52339) }
613 };
614
615 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
616 simde__m128i r = simde_mm_adds_epu16(test_vec[i].a, test_vec[i].b);
617 simde_assert_m128i_u16(r, ==, test_vec[i].r);
618 }
619
620 return 0;
621 }
622
623 static int
test_simde_mm_and_pd(SIMDE_MUNIT_TEST_ARGS)624 test_simde_mm_and_pd(SIMDE_MUNIT_TEST_ARGS) {
625 simde__m128d
626 all_set = simde_x_mm_setone_pd(),
627 all_unset = simde_mm_setzero_pd();
628
629 simde_assert_m128d_equal(simde_mm_and_pd(all_set, all_unset), all_unset);
630 simde_assert_m128d_equal(simde_mm_and_pd(all_set, all_set), all_set);
631 simde_assert_m128d_equal(simde_mm_and_pd(all_unset, all_unset), all_unset);
632
633 return 0;
634 }
635
636 static int
test_simde_mm_and_si128(SIMDE_MUNIT_TEST_ARGS)637 test_simde_mm_and_si128(SIMDE_MUNIT_TEST_ARGS) {
638 const struct {
639 simde__m128i a;
640 simde__m128i b;
641 simde__m128i r;
642 } test_vec[8] = {
643 { simde_mm_set_epi32(INT32_C( 1143386005), INT32_C( 255040004), INT32_C( 778825143), INT32_C( 1160880262)),
644 simde_mm_set_epi32(INT32_C(-1598010564), INT32_C( 882577136), INT32_C( 1895747884), INT32_C( 78458499)),
645 simde_mm_set_epi32(INT32_C( 2836), INT32_C( 68356608), INT32_C( 543867172), INT32_C( 69279874)) },
646 { simde_mm_set_epi32(INT32_C( 929630839), INT32_C( 1332223012), INT32_C( -595247247), INT32_C( 1607004091)),
647 simde_mm_set_epi32(INT32_C( -96984995), INT32_C( -496201158), INT32_C( 1667897198), INT32_C( 329068048)),
648 simde_mm_set_epi32(INT32_C( 841482325), INT32_C( 1114116128), INT32_C( 1073747808), INT32_C( 327691792)) },
649 { simde_mm_set_epi32(INT32_C( 1507410371), INT32_C(-1202228125), INT32_C( 213174798), INT32_C( 1712466479)),
650 simde_mm_set_epi32(INT32_C( -416935364), INT32_C( 76821686), INT32_C( -895281725), INT32_C( 640856929)),
651 simde_mm_set_epi32(INT32_C( 1090519040), INT32_C( 1324066), INT32_C( 144703490), INT32_C( 638722593)) },
652 { simde_mm_set_epi32(INT32_C(-1967400648), INT32_C( -398277023), INT32_C( 1276094966), INT32_C(-1580835262)),
653 simde_mm_set_epi32(INT32_C( 185492863), INT32_C( 1265367516), INT32_C( -384438464), INT32_C( 1008626379)),
654 simde_mm_set_epi32(INT32_C( 168444216), INT32_C( 1212334144), INT32_C( 1208330560), INT32_C( 537283138)) },
655 { simde_mm_set_epi32(INT32_C( 1287640091), INT32_C( -654000828), INT32_C( 597524546), INT32_C( 182360913)),
656 simde_mm_set_epi32(INT32_C( 1236330411), INT32_C( 1010510657), INT32_C(-1874705697), INT32_C( -544222805)),
657 simde_mm_set_epi32(INT32_C( 1219544075), INT32_C( 402663744), INT32_C( 66), INT32_C( 177115393)) },
658 { simde_mm_set_epi32(INT32_C( -783740762), INT32_C( 1592969400), INT32_C(-1896275639), INT32_C( 1398555518)),
659 simde_mm_set_epi32(INT32_C( 618146080), INT32_C( -972493969), INT32_C( -440292799), INT32_C( 888342397)),
660 simde_mm_set_epi32(INT32_C( 4718624), INT32_C( 1174456360), INT32_C(-2067718079), INT32_C( 273679228)) },
661 { simde_mm_set_epi32(INT32_C( 975551520), INT32_C( 223749592), INT32_C(-1022254731), INT32_C( -845311996)),
662 simde_mm_set_epi32(INT32_C( 1522650069), INT32_C( 1767255815), INT32_C( 1217271913), INT32_C(-1365644996)),
663 simde_mm_set_epi32(INT32_C( 436307968), INT32_C( 156640512), INT32_C( 1073741921), INT32_C(-1936097276)) },
664 { simde_mm_set_epi32(INT32_C(-1607852092), INT32_C( -146112938), INT32_C( 112326370), INT32_C( 971940993)),
665 simde_mm_set_epi32(INT32_C( 1129446249), INT32_C( -367605030), INT32_C( 2031327443), INT32_C( -763011289)),
666 simde_mm_set_epi32(INT32_C( 6976), INT32_C( -503166382), INT32_C( 1151170), INT32_C( 277087233)) }
667 };
668
669 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
670 simde__m128i r = simde_mm_and_si128(test_vec[i].a, test_vec[i].b);
671 simde_assert_m128i_i8(r, ==, test_vec[i].r);
672 }
673
674 return 0;
675 }
676
677 static int
test_simde_mm_andnot_pd(SIMDE_MUNIT_TEST_ARGS)678 test_simde_mm_andnot_pd (SIMDE_MUNIT_TEST_ARGS) {
679 static const struct {
680 const int64_t a[2];
681 const int64_t b[2];
682 const int64_t r[2];
683 } test_vec[] = {
684 { { -INT64_C( 2301068032488183266), INT64_C( 2211418302004999322) },
685 { -INT64_C( 6496218963460796338), -INT64_C( 1823812953617724359) },
686 { INT64_C( 416592882749870144), -INT64_C( 2305839985119459295) } },
687 { { -INT64_C( 4026537826130773906), -INT64_C( 4293586900818793454) },
688 { INT64_C( 3627658264586431853), INT64_C( 2793073689318142995) },
689 { INT64_C( 3620897194948822273), INT64_C( 2486233871494942721) } },
690 { { INT64_C( 9163842748139474741), INT64_C( 2569644122047224175) },
691 { -INT64_C( 969348282954885022), INT64_C( 811088657167341923) },
692 { -INT64_C( 9187294233813168062), INT64_C( 594616163615653888) } },
693 { { -INT64_C( 7634837049602759393), INT64_C( 7858258033422095925) },
694 { INT64_C( 8560872385946379772), INT64_C( 5408772763975523373) },
695 { INT64_C( 6972768561940938976), INT64_C( 144627715121612808) } },
696 { { INT64_C( 5707717806252392055), -INT64_C( 3300212919446621766) },
697 { INT64_C( 9059126230790306606), -INT64_C( 7813869476910184169) },
698 { INT64_C( 3497071796361199880), INT64_C( 111611459737241605) } },
699 { { INT64_C( 322122041068250894), INT64_C( 3008683809568371225) },
700 { INT64_C( 127266174305791736), -INT64_C( 2153613612017236628) },
701 { INT64_C( 109216282184321264), -INT64_C( 4459687314800229020) } },
702 { { -INT64_C( 3492115216109711814), INT64_C( 5213618401531810613) },
703 { INT64_C( 6185505972225623532), INT64_C( 2977154160444382105) },
704 { INT64_C( 1177221022203544004), INT64_C( 2378043193183766664) } },
705 { { INT64_C( 1499198256367688520), -INT64_C( 638210378185732981) },
706 { -INT64_C( 6727634533471112091), -INT64_C( 4478722654180704065) },
707 { -INT64_C( 6764261500459572187), INT64_C( 60904633988546612) } }
708 };
709
710 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
711 simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a);
712 simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b);
713 simde__m128i r = simde_mm_castpd_si128(simde_mm_andnot_pd(simde_mm_castsi128_pd(a), simde_mm_castsi128_pd(b)));
714 simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r));
715 }
716
717 return 0;
718 }
719
720 static int
test_simde_mm_andnot_si128(SIMDE_MUNIT_TEST_ARGS)721 test_simde_mm_andnot_si128(SIMDE_MUNIT_TEST_ARGS) {
722 const struct {
723 simde__m128i a;
724 simde__m128i b;
725 simde__m128i r;
726 } test_vec[8] = {
727 { simde_mm_set_epi32(INT32_C( -560808079), INT32_C( -229809400), INT32_C(-1262424809), INT32_C( 39478984)),
728 simde_mm_set_epi32(INT32_C(-2116907800), INT32_C( -478633369), INT32_C( -758500702), INT32_C( 774827765)),
729 simde_mm_set_epi32(INT32_C( 20971656), INT32_C( 19955815), INT32_C( 1107956384), INT32_C( 740591669)) },
730 { simde_mm_set_epi32(INT32_C( -390101732), INT32_C( -878593643), INT32_C( -87858932), INT32_C( 1872700566)),
731 simde_mm_set_epi32(INT32_C( 794830631), INT32_C( 1201718915), INT32_C( 1477008088), INT32_C( -178127418)),
732 simde_mm_set_epi32(INT32_C( 121643555), INT32_C( 67126786), INT32_C( 531152), INT32_C(-1872701120)) },
733 { simde_mm_set_epi32(INT32_C( -969785513), INT32_C( 743154241), INT32_C( -944974936), INT32_C(-1136592248)),
734 simde_mm_set_epi32(INT32_C( -909998602), INT32_C( 431643866), INT32_C( -708589890), INT32_C( -556429363)),
735 simde_mm_set_epi32(INT32_C( 163610784), INT32_C( 296770714), INT32_C( 272827414), INT32_C( 1117062469)) },
736 { simde_mm_set_epi32(INT32_C( 1619650408), INT32_C( 861525694), INT32_C(-2058207417), INT32_C( 228720218)),
737 simde_mm_set_epi32(INT32_C( 1416821078), INT32_C( 2107001565), INT32_C(-1248448269), INT32_C(-1204471361)),
738 simde_mm_set_epi32(INT32_C( 343021590), INT32_C( 1283852353), INT32_C( 813957296), INT32_C(-1340866139)) },
739 { simde_mm_set_epi32(INT32_C( -343490394), INT32_C( 1846187115), INT32_C( -847771260), INT32_C( 97935165)),
740 simde_mm_set_epi32(INT32_C( -69489865), INT32_C(-1109591795), INT32_C( 169478308), INT32_C( 1662522631)),
741 simde_mm_set_epi32(INT32_C( 274279185), INT32_C(-1848350460), INT32_C( 33685536), INT32_C( 1644691458)) },
742 { simde_mm_set_epi32(INT32_C( 608096731), INT32_C( -775399847), INT32_C( -52780990), INT32_C( 459462722)),
743 simde_mm_set_epi32(INT32_C(-1928888486), INT32_C(-1926941714), INT32_C(-1218438233), INT32_C( 195273416)),
744 simde_mm_set_epi32(INT32_C(-1996411392), INT32_C( 203760038), INT32_C( 52435877), INT32_C( 8462984)) },
745 { simde_mm_set_epi32(INT32_C( 1829801526), INT32_C( 1678890728), INT32_C(-1629742565), INT32_C( 902941266)),
746 simde_mm_set_epi32(INT32_C( 110066513), INT32_C( -591553870), INT32_C( -950259417), INT32_C( 810403185)),
747 simde_mm_set_epi32(INT32_C( 42955073), INT32_C(-1733556206), INT32_C( 1090527524), INT32_C( 787745)) },
748 { simde_mm_set_epi32(INT32_C( 321441431), INT32_C(-1200267660), INT32_C( -313751420), INT32_C( 515761953)),
749 simde_mm_set_epi32(INT32_C( -687838781), INT32_C( 1420638186), INT32_C(-1442242179), INT32_C( 1996838037)),
750 simde_mm_set_epi32(INT32_C(-1006624448), INT32_C( 1149772170), INT32_C( 33628537), INT32_C( 1627394196)) }
751 };
752
753 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
754 simde__m128i r = simde_mm_andnot_si128(test_vec[i].a, test_vec[i].b);
755 simde_assert_m128i_i8(r, ==, test_vec[i].r);
756 }
757
758 return 0;
759 }
760
761 static int
test_simde_mm_avg_epu8(SIMDE_MUNIT_TEST_ARGS)762 test_simde_mm_avg_epu8(SIMDE_MUNIT_TEST_ARGS) {
763 const struct {
764 simde__m128i a;
765 simde__m128i b;
766 simde__m128i r;
767 } test_vec[8] = {
768 { simde_x_mm_set_epu8( 22, 218, 216, 66, 82, 156, 47, 52,
769 255, 1, 136, 174, 147, 136, 106, 178),
770 simde_x_mm_set_epu8( 66, 241, 223, 129, 96, 67, 0, 68,
771 242, 71, 233, 224, 227, 252, 191, 92),
772 simde_x_mm_set_epu8( 44, 230, 220, 98, 89, 112, 24, 60,
773 249, 36, 185, 199, 187, 194, 149, 135) },
774 { simde_x_mm_set_epu8(219, 214, 26, 72, 63, 56, 200, 118,
775 196, 107, 88, 110, 187, 3, 64, 214),
776 simde_x_mm_set_epu8( 44, 175, 103, 82, 87, 192, 180, 37,
777 0, 200, 53, 214, 25, 17, 19, 149),
778 simde_x_mm_set_epu8(132, 195, 65, 77, 75, 124, 190, 78,
779 98, 154, 71, 162, 106, 10, 42, 182) },
780 { simde_x_mm_set_epu8(221, 9, 162, 208, 84, 84, 50, 140,
781 230, 69, 178, 12, 34, 173, 44, 58),
782 simde_x_mm_set_epu8( 4, 110, 65, 218, 252, 108, 241, 136,
783 36, 109, 68, 2, 121, 10, 120, 101),
784 simde_x_mm_set_epu8(113, 60, 114, 213, 168, 96, 146, 138,
785 133, 89, 123, 7, 78, 92, 82, 80) },
786 { simde_x_mm_set_epu8(173, 38, 26, 251, 66, 136, 168, 132,
787 170, 244, 145, 27, 76, 168, 97, 129),
788 simde_x_mm_set_epu8(211, 66, 29, 93, 231, 30, 149, 218,
789 72, 12, 231, 238, 124, 3, 127, 55),
790 simde_x_mm_set_epu8(192, 52, 28, 172, 149, 83, 159, 175,
791 121, 128, 188, 133, 100, 86, 112, 92) },
792 { simde_x_mm_set_epu8( 33, 120, 41, 4, 226, 71, 169, 72,
793 92, 211, 80, 53, 22, 250, 136, 31),
794 simde_x_mm_set_epu8(163, 237, 214, 178, 29, 194, 137, 109,
795 134, 197, 40, 228, 174, 101, 114, 162),
796 simde_x_mm_set_epu8( 98, 179, 128, 91, 128, 133, 153, 91,
797 113, 204, 60, 141, 98, 176, 125, 97) },
798 { simde_x_mm_set_epu8(151, 241, 42, 96, 21, 167, 26, 188,
799 124, 136, 158, 144, 227, 152, 4, 152),
800 simde_x_mm_set_epu8( 43, 216, 77, 147, 105, 127, 87, 93,
801 160, 103, 68, 85, 77, 41, 67, 189),
802 simde_x_mm_set_epu8( 97, 229, 60, 122, 63, 147, 57, 141,
803 142, 120, 113, 115, 152, 97, 36, 171) },
804 { simde_x_mm_set_epu8(229, 241, 5, 141, 89, 37, 175, 184,
805 139, 113, 20, 221, 179, 130, 61, 16),
806 simde_x_mm_set_epu8( 74, 70, 240, 235, 217, 244, 23, 139,
807 224, 48, 224, 137, 221, 180, 178, 80),
808 simde_x_mm_set_epu8(152, 156, 123, 188, 153, 141, 99, 162,
809 182, 81, 122, 179, 200, 155, 120, 48) },
810 { simde_x_mm_set_epu8( 30, 40, 139, 23, 169, 60, 77, 114,
811 84, 55, 70, 122, 10, 27, 47, 237),
812 simde_x_mm_set_epu8(133, 159, 246, 175, 239, 136, 111, 216,
813 173, 32, 117, 64, 231, 128, 162, 145),
814 simde_x_mm_set_epu8( 82, 100, 193, 99, 204, 98, 94, 165,
815 129, 44, 94, 93, 121, 78, 105, 191) }
816 };
817
818 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
819 simde__m128i r = simde_mm_avg_epu8(test_vec[i].a, test_vec[i].b);
820 simde_assert_m128i_u8(r, ==, test_vec[i].r);
821 }
822
823 return 0;
824 }
825
826 static int
test_simde_mm_avg_epu16(SIMDE_MUNIT_TEST_ARGS)827 test_simde_mm_avg_epu16(SIMDE_MUNIT_TEST_ARGS) {
828 const struct {
829 simde__m128i a;
830 simde__m128i b;
831 simde__m128i r;
832 } test_vec[8] = {
833 { simde_x_mm_set_epu16( 5850, 55362, 21148, 12084, 65281, 34990, 37768, 27314),
834 simde_x_mm_set_epu16(17137, 57217, 24643, 68, 62023, 59872, 58364, 48988),
835 simde_x_mm_set_epu16(11494, 56290, 22896, 6076, 63652, 47431, 48066, 38151) },
836 { simde_x_mm_set_epu16(56278, 6728, 16184, 51318, 50283, 22638, 47875, 16598),
837 simde_x_mm_set_epu16(11439, 26450, 22464, 46117, 200, 13782, 6417, 5013),
838 simde_x_mm_set_epu16(33859, 16589, 19324, 48718, 25242, 18210, 27146, 10806) },
839 { simde_x_mm_set_epu16(56585, 41680, 21588, 12940, 58949, 45580, 8877, 11322),
840 simde_x_mm_set_epu16( 1134, 16858, 64620, 61832, 9325, 17410, 30986, 30821),
841 simde_x_mm_set_epu16(28860, 29269, 43104, 37386, 34137, 31495, 19932, 21072) },
842 { simde_x_mm_set_epu16(44326, 6907, 17032, 43140, 43764, 37147, 19624, 24961),
843 simde_x_mm_set_epu16(54082, 7517, 59166, 38362, 18444, 59374, 31747, 32567),
844 simde_x_mm_set_epu16(49204, 7212, 38099, 40751, 31104, 48261, 25686, 28764) },
845 { simde_x_mm_set_epu16( 8568, 10500, 57927, 43336, 23763, 20533, 5882, 34847),
846 simde_x_mm_set_epu16(41965, 54962, 7618, 35181, 34501, 10468, 44645, 29346),
847 simde_x_mm_set_epu16(25267, 32731, 32773, 39259, 29132, 15501, 25264, 32097) },
848 { simde_x_mm_set_epu16(38897, 10848, 5543, 6844, 31880, 40592, 58264, 1176),
849 simde_x_mm_set_epu16(11224, 19859, 27007, 22365, 41063, 17493, 19753, 17341),
850 simde_x_mm_set_epu16(25061, 15354, 16275, 14605, 36472, 29043, 39009, 9259) },
851 { simde_x_mm_set_epu16(58865, 1421, 22821, 44984, 35697, 5341, 45954, 15632),
852 simde_x_mm_set_epu16(19014, 61675, 55796, 6027, 57392, 57481, 56756, 45648),
853 simde_x_mm_set_epu16(38940, 31548, 39309, 25506, 46545, 31411, 51355, 30640) },
854 { simde_x_mm_set_epu16( 7720, 35607, 43324, 19826, 21559, 18042, 2587, 12269),
855 simde_x_mm_set_epu16(34207, 63151, 61320, 28632, 44320, 30016, 59264, 41617),
856 simde_x_mm_set_epu16(20964, 49379, 52322, 24229, 32940, 24029, 30926, 26943) }
857 };
858
859 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
860 simde__m128i r = simde_mm_avg_epu16(test_vec[i].a, test_vec[i].b);
861 simde_assert_m128i_u16(r, ==, test_vec[i].r);
862 }
863
864 return 0;
865 }
866
867 static int
test_simde_mm_bslli_si128(SIMDE_MUNIT_TEST_ARGS)868 test_simde_mm_bslli_si128(SIMDE_MUNIT_TEST_ARGS) {
869 simde__m128i a, e, r;
870
871 a = simde_mm_set_epi8(INT8_C( 24), INT8_C( -55), INT8_C( -96), INT8_C( 87),
872 INT8_C( -58), INT8_C(-112), INT8_C( 23), INT8_C(-126),
873 INT8_C( -8), INT8_C( -11), INT8_C( 18), INT8_C( 30),
874 INT8_C( 114), INT8_C( 65), INT8_C( 26), INT8_C(-121));
875
876 e = simde_mm_set_epi8(INT8_C( 24), INT8_C( -55), INT8_C( -96), INT8_C( 87),
877 INT8_C( -58), INT8_C(-112), INT8_C( 23), INT8_C(-126),
878 INT8_C( -8), INT8_C( -11), INT8_C( 18), INT8_C( 30),
879 INT8_C( 114), INT8_C( 65), INT8_C( 26), INT8_C(-121));
880 r = simde_mm_bslli_si128(a, 0);
881 simde_assert_m128i_i8(r, ==, e);
882
883 e = simde_mm_set_epi8(INT8_C( 87), INT8_C( -58), INT8_C(-112), INT8_C( 23),
884 INT8_C(-126), INT8_C( -8), INT8_C( -11), INT8_C( 18),
885 INT8_C( 30), INT8_C( 114), INT8_C( 65), INT8_C( 26),
886 INT8_C(-121), INT8_C( 0), INT8_C( 0), INT8_C( 0));
887 r = simde_mm_bslli_si128(a, 3);
888 simde_assert_m128i_i8(r, ==, e);
889
890 e = simde_mm_set_epi8(INT8_C( 65), INT8_C( 26), INT8_C(-121), INT8_C( 0),
891 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
892 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
893 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0));
894 r = simde_mm_bslli_si128(a, 13);
895 simde_assert_m128i_i8(r, ==, e);
896
897 e = simde_mm_set_epi8(INT8_C( -96), INT8_C( 87), INT8_C( -58), INT8_C(-112),
898 INT8_C( 23), INT8_C(-126), INT8_C( -8), INT8_C( -11),
899 INT8_C( 18), INT8_C( 30), INT8_C( 114), INT8_C( 65),
900 INT8_C( 26), INT8_C(-121), INT8_C( 0), INT8_C( 0));
901 r = simde_mm_bslli_si128(a, 2);
902 simde_assert_m128i_i8(r, ==, e);
903
904 e = simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
905 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
906 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
907 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0));
908 r = simde_mm_bslli_si128(a, 19);
909 simde_assert_m128i_i8(r, ==, e);
910
911 return 0;
912 }
913
914 static int
test_simde_mm_bsrli_si128(SIMDE_MUNIT_TEST_ARGS)915 test_simde_mm_bsrli_si128(SIMDE_MUNIT_TEST_ARGS) {
916 simde__m128i a, e, r;
917
918 a = simde_mm_set_epi8(INT8_C(-121), INT8_C( -58), INT8_C( -15), INT8_C(-115),
919 INT8_C( -97), INT8_C( -96), INT8_C( -74), INT8_C(-113),
920 INT8_C(-121), INT8_C( 99), INT8_C( 126), INT8_C( 113),
921 INT8_C( -29), INT8_C( 114), INT8_C( -65), INT8_C( 9));
922
923 e = simde_mm_set_epi8(INT8_C(-121), INT8_C( -58), INT8_C( -15), INT8_C(-115),
924 INT8_C( -97), INT8_C( -96), INT8_C( -74), INT8_C(-113),
925 INT8_C(-121), INT8_C( 99), INT8_C( 126), INT8_C( 113),
926 INT8_C( -29), INT8_C( 114), INT8_C( -65), INT8_C( 9));
927 r = simde_mm_bsrli_si128(a, 0);
928 simde_assert_m128i_i8(r, ==, e);
929
930 e = simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C(-121),
931 INT8_C( -58), INT8_C( -15), INT8_C(-115), INT8_C( -97),
932 INT8_C( -96), INT8_C( -74), INT8_C(-113), INT8_C(-121),
933 INT8_C( 99), INT8_C( 126), INT8_C( 113), INT8_C( -29));
934 r = simde_mm_bsrli_si128(a, 3);
935 simde_assert_m128i_i8(r, ==, e);
936
937 e = simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
938 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
939 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
940 INT8_C( 0), INT8_C(-121), INT8_C( -58), INT8_C( -15));
941 r = simde_mm_bsrli_si128(a, 13);
942 simde_assert_m128i_i8(r, ==, e);
943
944 e = simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C(-121), INT8_C( -58),
945 INT8_C( -15), INT8_C(-115), INT8_C( -97), INT8_C( -96),
946 INT8_C( -74), INT8_C(-113), INT8_C(-121), INT8_C( 99),
947 INT8_C( 126), INT8_C( 113), INT8_C( -29), INT8_C( 114));
948 r = simde_mm_bsrli_si128(a, 2);
949 simde_assert_m128i_i8(r, ==, e);
950
951 e = simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
952 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
953 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
954 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0));
955 r = simde_mm_bsrli_si128(a, 19);
956 simde_assert_m128i_i8(r, ==, e);
957
958 return 0;
959 }
960
961 static int
test_simde_mm_castpd_ps(SIMDE_MUNIT_TEST_ARGS)962 test_simde_mm_castpd_ps(SIMDE_MUNIT_TEST_ARGS) {
963 const struct {
964 simde__m128d a;
965 simde__m128 r;
966 } test_vec[8] = {
967 { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C( 499.48), SIMDE_FLOAT32_C( 72.83), SIMDE_FLOAT32_C(-420.10), SIMDE_FLOAT32_C( -361.15))),
968 simde_mm_set_ps(SIMDE_FLOAT32_C( 499.48), SIMDE_FLOAT32_C( 72.83), SIMDE_FLOAT32_C(-420.10), SIMDE_FLOAT32_C( -361.15)) },
969 { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C(-412.40), SIMDE_FLOAT32_C(-314.35), SIMDE_FLOAT32_C(-851.03), SIMDE_FLOAT32_C( 128.01))),
970 simde_mm_set_ps(SIMDE_FLOAT32_C(-412.40), SIMDE_FLOAT32_C(-314.35), SIMDE_FLOAT32_C(-851.03), SIMDE_FLOAT32_C( 128.01)) },
971 { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C(-411.93), SIMDE_FLOAT32_C( 780.67), SIMDE_FLOAT32_C(-928.22), SIMDE_FLOAT32_C( 762.24))),
972 simde_mm_set_ps(SIMDE_FLOAT32_C(-411.93), SIMDE_FLOAT32_C( 780.67), SIMDE_FLOAT32_C(-928.22), SIMDE_FLOAT32_C( 762.24)) },
973 { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C(-614.18), SIMDE_FLOAT32_C( 644.19), SIMDE_FLOAT32_C( -41.15), SIMDE_FLOAT32_C( 871.68))),
974 simde_mm_set_ps(SIMDE_FLOAT32_C(-614.18), SIMDE_FLOAT32_C( 644.19), SIMDE_FLOAT32_C( -41.15), SIMDE_FLOAT32_C( 871.68)) },
975 { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C( 795.82), SIMDE_FLOAT32_C( 486.26), SIMDE_FLOAT32_C(-686.59), SIMDE_FLOAT32_C( 277.69))),
976 simde_mm_set_ps(SIMDE_FLOAT32_C( 795.82), SIMDE_FLOAT32_C( 486.26), SIMDE_FLOAT32_C(-686.59), SIMDE_FLOAT32_C( 277.69)) },
977 { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C( 221.74), SIMDE_FLOAT32_C(-655.22), SIMDE_FLOAT32_C(-366.90), SIMDE_FLOAT32_C( -245.25))),
978 simde_mm_set_ps(SIMDE_FLOAT32_C( 221.74), SIMDE_FLOAT32_C(-655.22), SIMDE_FLOAT32_C(-366.90), SIMDE_FLOAT32_C( -245.25)) },
979 { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C( -83.75), SIMDE_FLOAT32_C( 862.26), SIMDE_FLOAT32_C( 55.37), SIMDE_FLOAT32_C( -26.83))),
980 simde_mm_set_ps(SIMDE_FLOAT32_C( -83.75), SIMDE_FLOAT32_C( 862.26), SIMDE_FLOAT32_C( 55.37), SIMDE_FLOAT32_C( -26.83)) },
981 { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C(-557.26), SIMDE_FLOAT32_C(-554.56), SIMDE_FLOAT32_C(-507.07), SIMDE_FLOAT32_C( 395.47))),
982 simde_mm_set_ps(SIMDE_FLOAT32_C(-557.26), SIMDE_FLOAT32_C(-554.56), SIMDE_FLOAT32_C(-507.07), SIMDE_FLOAT32_C( 395.47)) }
983 };
984
985 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
986 simde__m128 r = simde_mm_castpd_ps(test_vec[i].a);
987 simde_assert_m128_close(r, test_vec[i].r, 1);
988 }
989
990 return 0;
991 }
992
993 static int
test_simde_mm_castps_pd(SIMDE_MUNIT_TEST_ARGS)994 test_simde_mm_castps_pd(SIMDE_MUNIT_TEST_ARGS) {
995 const struct {
996 simde__m128 a;
997 simde__m128 r;
998 } test_vec[8] = {
999 { simde_mm_set_ps(SIMDE_FLOAT32_C( -48.11), SIMDE_FLOAT32_C( 66.54), SIMDE_FLOAT32_C( -702.38), SIMDE_FLOAT32_C( -384.97)),
1000 simde_mm_set_ps(SIMDE_FLOAT32_C( -48.11), SIMDE_FLOAT32_C( 66.54), SIMDE_FLOAT32_C( -702.38), SIMDE_FLOAT32_C( -384.97)) },
1001 { simde_mm_set_ps(SIMDE_FLOAT32_C( 975.22), SIMDE_FLOAT32_C( -352.19), SIMDE_FLOAT32_C( -258.03), SIMDE_FLOAT32_C( 978.92)),
1002 simde_mm_set_ps(SIMDE_FLOAT32_C( 975.22), SIMDE_FLOAT32_C( -352.19), SIMDE_FLOAT32_C( -258.03), SIMDE_FLOAT32_C( 978.92)) },
1003 { simde_mm_set_ps(SIMDE_FLOAT32_C( 24.73), SIMDE_FLOAT32_C( -551.11), SIMDE_FLOAT32_C( -52.52), SIMDE_FLOAT32_C( 259.60)),
1004 simde_mm_set_ps(SIMDE_FLOAT32_C( 24.73), SIMDE_FLOAT32_C( -551.11), SIMDE_FLOAT32_C( -52.52), SIMDE_FLOAT32_C( 259.60)) },
1005 { simde_mm_set_ps(SIMDE_FLOAT32_C( 614.82), SIMDE_FLOAT32_C( 711.79), SIMDE_FLOAT32_C( 715.74), SIMDE_FLOAT32_C( 872.89)),
1006 simde_mm_set_ps(SIMDE_FLOAT32_C( 614.82), SIMDE_FLOAT32_C( 711.79), SIMDE_FLOAT32_C( 715.74), SIMDE_FLOAT32_C( 872.89)) },
1007 { simde_mm_set_ps(SIMDE_FLOAT32_C( 434.09), SIMDE_FLOAT32_C( 97.43), SIMDE_FLOAT32_C( 836.69), SIMDE_FLOAT32_C( 490.93)),
1008 simde_mm_set_ps(SIMDE_FLOAT32_C( 434.09), SIMDE_FLOAT32_C( 97.43), SIMDE_FLOAT32_C( 836.69), SIMDE_FLOAT32_C( 490.93)) },
1009 { simde_mm_set_ps(SIMDE_FLOAT32_C( -964.09), SIMDE_FLOAT32_C( 616.34), SIMDE_FLOAT32_C( -267.39), SIMDE_FLOAT32_C( -457.57)),
1010 simde_mm_set_ps(SIMDE_FLOAT32_C( -964.09), SIMDE_FLOAT32_C( 616.34), SIMDE_FLOAT32_C( -267.39), SIMDE_FLOAT32_C( -457.57)) },
1011 { simde_mm_set_ps(SIMDE_FLOAT32_C( -14.24), SIMDE_FLOAT32_C( 802.19), SIMDE_FLOAT32_C( 741.42), SIMDE_FLOAT32_C( -211.48)),
1012 simde_mm_set_ps(SIMDE_FLOAT32_C( -14.24), SIMDE_FLOAT32_C( 802.19), SIMDE_FLOAT32_C( 741.42), SIMDE_FLOAT32_C( -211.48)) },
1013 { simde_mm_set_ps(SIMDE_FLOAT32_C( -739.71), SIMDE_FLOAT32_C( -918.58), SIMDE_FLOAT32_C( -598.92), SIMDE_FLOAT32_C( -924.03)),
1014 simde_mm_set_ps(SIMDE_FLOAT32_C( -739.71), SIMDE_FLOAT32_C( -918.58), SIMDE_FLOAT32_C( -598.92), SIMDE_FLOAT32_C( -924.03)) }
1015 };
1016
1017 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1018 simde__m128 r = simde_mm_castpd_ps(simde_mm_castps_pd(test_vec[i].a));
1019 simde_assert_m128_close(r, test_vec[i].r, 1);
1020 }
1021
1022 return 0;
1023 }
1024
1025 static int
test_simde_mm_castsi128_pd(SIMDE_MUNIT_TEST_ARGS)1026 test_simde_mm_castsi128_pd(SIMDE_MUNIT_TEST_ARGS) {
1027 simde__m128i test_vec[8] = {
1028 simde_mm_set_epi32(INT32_C(-1784593785), INT32_C( 1037253725), INT32_C( 225827038), INT32_C(-2070942389)),
1029 simde_mm_set_epi32(INT32_C( 2006039830), INT32_C( 831495128), INT32_C( 1875760759), INT32_C( 315081037)),
1030 simde_mm_set_epi32(INT32_C( -305750616), INT32_C( 602617399), INT32_C( 1569354160), INT32_C(-1091905770)),
1031 simde_mm_set_epi32(INT32_C(-1852218105), INT32_C(-1464694454), INT32_C(-1287612023), INT32_C( 1418106957)),
1032 simde_mm_set_epi32(INT32_C( 1382189486), INT32_C( 561466363), INT32_C( -455563445), INT32_C( 733917325)),
1033 simde_mm_set_epi32(INT32_C( -187102213), INT32_C( -373894547), INT32_C( 335417846), INT32_C( 400855569)),
1034 simde_mm_set_epi32(INT32_C( 1405293845), INT32_C( -164981292), INT32_C( 180491437), INT32_C( 1551867928)),
1035 simde_mm_set_epi32(INT32_C( 458893421), INT32_C(-1960480477), INT32_C( 1264329759), INT32_C( 1663854158))
1036 };
1037
1038 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1039 simde__m128i r = simde_mm_castpd_si128(simde_mm_castsi128_pd(test_vec[i]));
1040 simde_assert_m128i_equal(r, test_vec[i]);
1041 }
1042
1043 return 0;
1044 }
1045
1046 static int
test_simde_mm_castsi128_ps(SIMDE_MUNIT_TEST_ARGS)1047 test_simde_mm_castsi128_ps(SIMDE_MUNIT_TEST_ARGS) {
1048 simde__m128i test_vec[8] = {
1049 simde_mm_set_epi32(INT32_C(-1036963898), INT32_C( 1847069037), INT32_C( 740321504), INT32_C( 778754840)),
1050 simde_mm_set_epi32(INT32_C( 975137998), INT32_C( -252397546), INT32_C( 1504697866), INT32_C(-1327032545)),
1051 simde_mm_set_epi32(INT32_C(-1494981423), INT32_C( -175189577), INT32_C( 2056595322), INT32_C( 1080531273)),
1052 simde_mm_set_epi32(INT32_C(-1391843620), INT32_C( 424327107), INT32_C( 948927709), INT32_C( -666077781)),
1053 simde_mm_set_epi32(INT32_C( 951847201), INT32_C( -299846327), INT32_C( 575809604), INT32_C(-1150359231)),
1054 simde_mm_set_epi32(INT32_C( 837564377), INT32_C( -933128035), INT32_C( -581372672), INT32_C( -490866291)),
1055 simde_mm_set_epi32(INT32_C( -169157316), INT32_C( 1521943175), INT32_C( 841770394), INT32_C( -192049832)),
1056 simde_mm_set_epi32(INT32_C( -848324384), INT32_C(-1699878899), INT32_C( -332340467), INT32_C( 934012294))
1057 };
1058
1059 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1060 simde__m128i r = simde_mm_castps_si128(simde_mm_castsi128_ps(test_vec[i]));
1061 simde_assert_m128i_i64(r, ==, test_vec[i]);
1062 }
1063
1064 return 0;
1065 }
1066
1067 static int
test_simde_mm_cmpeq_epi8(SIMDE_MUNIT_TEST_ARGS)1068 test_simde_mm_cmpeq_epi8(SIMDE_MUNIT_TEST_ARGS) {
1069 const struct {
1070 simde__m128i a;
1071 simde__m128i b;
1072 simde__m128i r;
1073 } test_vec[8] = {
1074 { simde_mm_set_epi8(INT8_C( 48), INT8_C( -17), INT8_C( 87), INT8_C( -4),
1075 INT8_C( 4), INT8_C( -44), INT8_C( 121), INT8_C( 68),
1076 INT8_C( 49), INT8_C(-108), INT8_C( 49), INT8_C( -79),
1077 INT8_C( 51), INT8_C( -82), INT8_C( 23), INT8_C( -58)),
1078 simde_mm_set_epi8(INT8_C( 87), INT8_C( -42), INT8_C( 33), INT8_C( 126),
1079 INT8_C( 91), INT8_C( 115), INT8_C( -90), INT8_C( 48),
1080 INT8_C( -49), INT8_C(-119), INT8_C( 23), INT8_C( 50),
1081 INT8_C( -10), INT8_C( -15), INT8_C( -16), INT8_C( -58)),
1082 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1083 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1084 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1085 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) },
1086 { simde_mm_set_epi8(INT8_C( 90), INT8_C( -93), INT8_C( 121), INT8_C( 114),
1087 INT8_C( 21), INT8_C( -52), INT8_C( -45), INT8_C( -83),
1088 INT8_C(-123), INT8_C(-119), INT8_C( -53), INT8_C(-117),
1089 INT8_C( -60), INT8_C( -20), INT8_C(-100), INT8_C( 26)),
1090 simde_mm_set_epi8(INT8_C( 67), INT8_C( -34), INT8_C(-110), INT8_C( -79),
1091 INT8_C( -72), INT8_C( -43), INT8_C( 64), INT8_C( -74),
1092 INT8_C( 64), INT8_C( 85), INT8_C( -71), INT8_C( 89),
1093 INT8_C( 35), INT8_C( 81), INT8_C( 104), INT8_C( 111)),
1094 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1095 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1096 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1097 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
1098 { simde_mm_set_epi8(INT8_C( 33), INT8_C( 56), INT8_C( -20), INT8_C( 49),
1099 INT8_C( -77), INT8_C(-123), INT8_C( -77), INT8_C(-109),
1100 INT8_C( -13), INT8_C( 91), INT8_C( 105), INT8_C( 29),
1101 INT8_C( 35), INT8_C( -62), INT8_C( 39), INT8_C( -24)),
1102 simde_mm_set_epi8(INT8_C( -80), INT8_C( -37), INT8_C( 43), INT8_C( 121),
1103 INT8_C(-104), INT8_C( -93), INT8_C(-100), INT8_C( 55),
1104 INT8_C( -82), INT8_C( -92), INT8_C( -6), INT8_C( 2),
1105 INT8_C( -33), INT8_C( 114), INT8_C( -94), INT8_C( 58)),
1106 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1107 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1108 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1109 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
1110 { simde_mm_set_epi8(INT8_C( -50), INT8_C( 20), INT8_C( 99), INT8_C( 4),
1111 INT8_C( 66), INT8_C( -69), INT8_C(-117), INT8_C( 25),
1112 INT8_C( -96), INT8_C( -11), INT8_C( -75), INT8_C( -79),
1113 INT8_C( 88), INT8_C(-123), INT8_C( -55), INT8_C( 22)),
1114 simde_mm_set_epi8(INT8_C( 68), INT8_C(-117), INT8_C(-113), INT8_C( 30),
1115 INT8_C( 0), INT8_C( 65), INT8_C( -61), INT8_C( -31),
1116 INT8_C( -53), INT8_C( -2), INT8_C( -47), INT8_C( 20),
1117 INT8_C( -79), INT8_C(-126), INT8_C( 40), INT8_C( 81)),
1118 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1119 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1120 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1121 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
1122 { simde_mm_set_epi8(INT8_C( 100), INT8_C( -54), INT8_C( -62), INT8_C( -41),
1123 INT8_C(-110), INT8_C( -3), INT8_C(-102), INT8_C( -2),
1124 INT8_C( 26), INT8_C( -67), INT8_C( -67), INT8_C( -73),
1125 INT8_C( 18), INT8_C( 123), INT8_C( 122), INT8_C( 106)),
1126 simde_mm_set_epi8(INT8_C( -5), INT8_C( 2), INT8_C( 119), INT8_C( 28),
1127 INT8_C( -24), INT8_C( 12), INT8_C( 106), INT8_C( -55),
1128 INT8_C( 124), INT8_C( 69), INT8_C( 31), INT8_C(-126),
1129 INT8_C( -80), INT8_C( -78), INT8_C( -93), INT8_C( -23)),
1130 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1131 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1132 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1133 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
1134 { simde_mm_set_epi8(INT8_C( 48), INT8_C( -25), INT8_C( -80), INT8_C( 17),
1135 INT8_C( -70), INT8_C( -40), INT8_C( 83), INT8_C( 37),
1136 INT8_C( 22), INT8_C( -91), INT8_C( -79), INT8_C( 8),
1137 INT8_C( 9), INT8_C( -21), INT8_C( -51), INT8_C( -21)),
1138 simde_mm_set_epi8(INT8_C( 55), INT8_C( 114), INT8_C( -79), INT8_C( -59),
1139 INT8_C( 15), INT8_C( -50), INT8_C( -69), INT8_C( 7),
1140 INT8_C(-113), INT8_C( -95), INT8_C( 112), INT8_C( 5),
1141 INT8_C( -30), INT8_C( -68), INT8_C( -27), INT8_C( -43)),
1142 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1143 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1144 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1145 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
1146 { simde_mm_set_epi8(INT8_C( 34), INT8_C( 111), INT8_C( 52), INT8_C( 93),
1147 INT8_C( -12), INT8_C( 98), INT8_C( -88), INT8_C( 63),
1148 INT8_C( 64), INT8_C( -98), INT8_C( 18), INT8_C( 40),
1149 INT8_C( 119), INT8_C( 68), INT8_C( -90), INT8_C( -37)),
1150 simde_mm_set_epi8(INT8_C( 35), INT8_C( 97), INT8_C( 3), INT8_C( 88),
1151 INT8_C( -70), INT8_C( -12), INT8_C( -13), INT8_C( 52),
1152 INT8_C( 127), INT8_C( -5), INT8_C( -24), INT8_C( -10),
1153 INT8_C( -21), INT8_C(-112), INT8_C( -81), INT8_C( 86)),
1154 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1155 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1156 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1157 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) },
1158 { simde_mm_set_epi8(INT8_C( -25), INT8_C(-100), INT8_C( -66), INT8_C( 103),
1159 INT8_C(-103), INT8_C( 116), INT8_C( -97), INT8_C( -43),
1160 INT8_C( 123), INT8_C( -33), INT8_C( -71), INT8_C(-122),
1161 INT8_C( 100), INT8_C( 116), INT8_C( 67), INT8_C(-119)),
1162 simde_mm_set_epi8(INT8_C( 103), INT8_C( -84), INT8_C( 102), INT8_C( -67),
1163 INT8_C( -82), INT8_C( 14), INT8_C( -17), INT8_C( -71),
1164 INT8_C( -31), INT8_C(-109), INT8_C( -84), INT8_C( -22),
1165 INT8_C( 78), INT8_C(-120), INT8_C( -77), INT8_C( -6)),
1166 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1167 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1168 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1169 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0)) }
1170 };
1171
1172 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1173 simde__m128i r = simde_mm_cmpeq_epi8(test_vec[i].a, test_vec[i].b);
1174 simde_assert_m128i_i8(r, ==, test_vec[i].r);
1175 }
1176
1177 return 0;
1178 }
1179
1180 static int
test_simde_mm_cmpeq_epi16(SIMDE_MUNIT_TEST_ARGS)1181 test_simde_mm_cmpeq_epi16(SIMDE_MUNIT_TEST_ARGS) {
1182 const struct {
1183 simde__m128i a;
1184 simde__m128i b;
1185 simde__m128i r;
1186 } test_vec[8] = {
1187 { simde_mm_set_epi16(INT16_C( 5875), INT16_C(-30240), INT16_C( 4973), INT16_C(-24835),
1188 INT16_C( -2682), INT16_C( 25733), INT16_C( 1837), INT16_C( -8035)),
1189 simde_mm_set_epi16(INT16_C( 5875), INT16_C(-30240), INT16_C( 9332), INT16_C(-24835),
1190 INT16_C( -8998), INT16_C( 25733), INT16_C( 1837), INT16_C(-18483)),
1191 simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1),
1192 INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) },
1193 { simde_mm_set_epi16(INT16_C( -1458), INT16_C( 12290), INT16_C( 394), INT16_C( 6014),
1194 INT16_C( 25725), INT16_C( 16049), INT16_C( -659), INT16_C( 13250)),
1195 simde_mm_set_epi16(INT16_C( -1458), INT16_C( 12290), INT16_C( 394), INT16_C( 6014),
1196 INT16_C( 25725), INT16_C(-30312), INT16_C( -659), INT16_C( 20372)),
1197 simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1),
1198 INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0)) },
1199 { simde_mm_set_epi16(INT16_C( -4375), INT16_C( 1648), INT16_C( -8256), INT16_C( 10030),
1200 INT16_C( 20444), INT16_C( -7330), INT16_C( -7889), INT16_C( 23879)),
1201 simde_mm_set_epi16(INT16_C( -644), INT16_C( 1648), INT16_C( -8256), INT16_C( 10030),
1202 INT16_C( 4813), INT16_C( -7330), INT16_C(-10599), INT16_C(-13677)),
1203 simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1),
1204 INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0)) },
1205 { simde_mm_set_epi16(INT16_C(-20962), INT16_C(-30592), INT16_C(-23740), INT16_C( -1360),
1206 INT16_C( 6756), INT16_C( 10080), INT16_C( 31194), INT16_C(-10248)),
1207 simde_mm_set_epi16(INT16_C(-20962), INT16_C(-19403), INT16_C( 31222), INT16_C( 12369),
1208 INT16_C( 10909), INT16_C( 10080), INT16_C( 31194), INT16_C(-10248)),
1209 simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0),
1210 INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)) },
1211 { simde_mm_set_epi16(INT16_C( -5540), INT16_C(-14756), INT16_C(-15994), INT16_C( 1795),
1212 INT16_C( 18849), INT16_C( 15779), INT16_C( 5314), INT16_C(-13448)),
1213 simde_mm_set_epi16(INT16_C( -5540), INT16_C( 14083), INT16_C(-16603), INT16_C( 1795),
1214 INT16_C( 28557), INT16_C(-32040), INT16_C( 5314), INT16_C( -4887)),
1215 simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1),
1216 INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)) },
1217 { simde_mm_set_epi16(INT16_C(-18621), INT16_C( 6869), INT16_C(-16161), INT16_C(-24568),
1218 INT16_C(-10576), INT16_C( 20065), INT16_C( -8241), INT16_C(-21658)),
1219 simde_mm_set_epi16(INT16_C(-18621), INT16_C( 6869), INT16_C(-10830), INT16_C(-24568),
1220 INT16_C(-10576), INT16_C( 20065), INT16_C( -8094), INT16_C(-21658)),
1221 simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1),
1222 INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( -1)) },
1223 { simde_mm_set_epi16(INT16_C(-20765), INT16_C( 27683), INT16_C( 13646), INT16_C( 26224),
1224 INT16_C(-12316), INT16_C( -2556), INT16_C( -1320), INT16_C(-15938)),
1225 simde_mm_set_epi16(INT16_C( -5976), INT16_C( 27683), INT16_C( -6395), INT16_C( 26224),
1226 INT16_C(-12316), INT16_C( -2556), INT16_C( -1320), INT16_C(-15143)),
1227 simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1),
1228 INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0)) },
1229 { simde_mm_set_epi16(INT16_C( 25864), INT16_C( 17430), INT16_C( 25473), INT16_C( 24392),
1230 INT16_C( 27481), INT16_C( 2288), INT16_C( 24811), INT16_C( 18514)),
1231 simde_mm_set_epi16(INT16_C( 25864), INT16_C( 8829), INT16_C( 25473), INT16_C( 24392),
1232 INT16_C( 27481), INT16_C( 4599), INT16_C( 24811), INT16_C( 18514)),
1233 simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1),
1234 INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) }
1235 };
1236
1237 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1238 simde__m128i r = simde_mm_cmpeq_epi16(test_vec[i].a, test_vec[i].b);
1239 simde_assert_m128i_i16(r, ==, test_vec[i].r);
1240 }
1241
1242 return 0;
1243 }
1244
1245 static int
test_simde_mm_cmpeq_epi32(SIMDE_MUNIT_TEST_ARGS)1246 test_simde_mm_cmpeq_epi32(SIMDE_MUNIT_TEST_ARGS) {
1247 const struct {
1248 simde__m128i a;
1249 simde__m128i b;
1250 simde__m128i r;
1251 } test_vec[8] = {
1252 { simde_mm_set_epi32( 385059296, 325951229, -175741819, 120447133),
1253 simde_mm_set_epi32( -411118693, 325951229, -175741819, -74467379),
1254 simde_mm_set_epi32( 0, -1, -1, 0) },
1255 { simde_mm_set_epi32( 2086724423, 2071647391, 395772386, -878201179),
1256 simde_mm_set_epi32( 2086724423, 25827198, 1685929649, -43174974),
1257 simde_mm_set_epi32( -1, 0, 0, 0) },
1258 { simde_mm_set_epi32(-1656549033, -529471298, -677159845, -1011499644),
1259 simde_mm_set_epi32(-1108138959, 2008596507, 36966751, -1011499644),
1260 simde_mm_set_epi32( 0, 0, 0, -1) },
1261 { simde_mm_set_epi32( -42154427, 232395060, 315449676, -694564205),
1262 simde_mm_set_epi32(-1781616670, 232395060, 315449676, -694564205),
1263 simde_mm_set_epi32( 0, -1, -1, -1) },
1264 { simde_mm_set_epi32(-1373730688, -1555760464, 442771296, 2044385272),
1265 simde_mm_set_epi32( -819547083, -1555760464, 442771296, 2044385272),
1266 simde_mm_set_epi32( 0, -1, -1, -1) },
1267 { simde_mm_set_epi32( -285007987, 1222927916, -234086536, 711157928),
1268 simde_mm_set_epi32( -285007987, 1222927916, 1235303843, 711157928),
1269 simde_mm_set_epi32( -1, -1, 0, -1) },
1270 { simde_mm_set_epi32( 1734698060, -250509290, -430142591, 970705024),
1271 simde_mm_set_epi32( 1734698060, -1399422252, -430142591, -1199939349),
1272 simde_mm_set_epi32( -1, 0, -1, 0) },
1273 { simde_mm_set_epi32( 1285559999, -709744735, -1852486552, -530433851),
1274 simde_mm_set_epi32( 1285559999, -709744735, -1768521466, -530433851),
1275 simde_mm_set_epi32( -1, -1, 0, -1) }
1276 };
1277
1278 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1279 simde__m128i r = simde_mm_cmpeq_epi32(test_vec[i].a, test_vec[i].b);
1280 simde_assert_m128i_i32(r, ==, test_vec[i].r);
1281 }
1282
1283 return 0;
1284 }
1285
1286 static int
test_simde_mm_cmpeq_pd(SIMDE_MUNIT_TEST_ARGS)1287 test_simde_mm_cmpeq_pd(SIMDE_MUNIT_TEST_ARGS) {
1288 const struct {
1289 simde__m128d a;
1290 simde__m128d b;
1291 simde__m128i r;
1292 } test_vec[8] = {
1293 { simde_mm_set_pd(SIMDE_FLOAT64_C( 780.35), SIMDE_FLOAT64_C( 826.07)),
1294 simde_mm_set_pd(SIMDE_FLOAT64_C( 625.03), SIMDE_FLOAT64_C( 826.07)),
1295 simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C(-1), INT32_C(-1)) },
1296 { simde_mm_set_pd(SIMDE_FLOAT64_C( -334.66), SIMDE_FLOAT64_C( 476.36)),
1297 simde_mm_set_pd(SIMDE_FLOAT64_C( -334.66), SIMDE_FLOAT64_C( 556.75)),
1298 simde_mm_set_epi32(INT32_C(-1), INT32_C(-1), INT32_C( 0), INT32_C( 0)) },
1299 { simde_mm_set_pd(SIMDE_FLOAT64_C( 324.97), SIMDE_FLOAT64_C( 726.67)),
1300 simde_mm_set_pd(SIMDE_FLOAT64_C( -886.03), SIMDE_FLOAT64_C( 556.75)),
1301 simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
1302 { simde_mm_set_pd(SIMDE_FLOAT64_C( 432.42), SIMDE_FLOAT64_C( 208.20)),
1303 simde_mm_set_pd(SIMDE_FLOAT64_C( -886.03), SIMDE_FLOAT64_C( 556.75)),
1304 simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
1305 { simde_mm_set_pd(SIMDE_FLOAT64_C( 361.87), SIMDE_FLOAT64_C( -173.19)),
1306 simde_mm_set_pd(SIMDE_FLOAT64_C( 190.30), SIMDE_FLOAT64_C( -730.35)),
1307 simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
1308 { simde_mm_set_pd(SIMDE_FLOAT64_C( 187.80), SIMDE_FLOAT64_C( -428.45)),
1309 simde_mm_set_pd(SIMDE_FLOAT64_C( 754.99), SIMDE_FLOAT64_C( -730.35)),
1310 simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
1311 { simde_mm_set_pd(SIMDE_FLOAT64_C( 34.65), SIMDE_FLOAT64_C( 814.87)),
1312 simde_mm_set_pd(SIMDE_FLOAT64_C( 105.60), SIMDE_FLOAT64_C( 292.36)),
1313 simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
1314 { simde_mm_set_pd(SIMDE_FLOAT64_C( 221.70), SIMDE_FLOAT64_C( -277.53)),
1315 simde_mm_set_pd(SIMDE_FLOAT64_C( -578.28), SIMDE_FLOAT64_C( 292.36)),
1316 simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }
1317 };
1318
1319 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1320 simde__m128i r = simde_mm_castpd_si128(simde_mm_cmpeq_pd(test_vec[i].a, test_vec[i].b));
1321 simde_assert_m128i_i32(r, ==, test_vec[i].r);
1322 }
1323
1324 return 0;
1325 }
1326
1327 static int
test_simde_mm_cmpeq_sd(SIMDE_MUNIT_TEST_ARGS)1328 test_simde_mm_cmpeq_sd(SIMDE_MUNIT_TEST_ARGS) {
1329 simde__m128d all_unset = simde_mm_setzero_pd();
1330 simde__m128d all_set = simde_mm_cmpeq_pd(all_unset, all_unset);
1331
1332 const struct {
1333 simde__m128d a;
1334 simde__m128d b;
1335 simde__m128d r;
1336 } test_vec[8] = {
1337 { simde_mm_set_pd(SIMDE_FLOAT64_C( 386.34), SIMDE_FLOAT64_C( 460.38)),
1338 simde_mm_set_pd(SIMDE_FLOAT64_C( -116.45), SIMDE_FLOAT64_C( 460.38)),
1339 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 386.34), SIMDE_FLOAT64_C( 460.38)), all_set) },
1340 { simde_mm_set_pd(SIMDE_FLOAT64_C( -493.11), SIMDE_FLOAT64_C( 58.42)),
1341 simde_mm_set_pd(SIMDE_FLOAT64_C( 511.42), SIMDE_FLOAT64_C( 58.42)),
1342 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -493.11), SIMDE_FLOAT64_C( 58.42)), all_set) },
1343 { simde_mm_set_pd(SIMDE_FLOAT64_C( 156.83), SIMDE_FLOAT64_C( -432.98)),
1344 simde_mm_set_pd(SIMDE_FLOAT64_C( 156.83), SIMDE_FLOAT64_C( -422.70)),
1345 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 156.83), SIMDE_FLOAT64_C( -432.98)), all_unset) },
1346 { simde_mm_set_pd(SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -577.06)),
1347 simde_mm_set_pd(SIMDE_FLOAT64_C( 404.92), SIMDE_FLOAT64_C( -577.06)),
1348 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 422.21), SIMDE_FLOAT64_C( -577.06)), all_set) },
1349 { simde_mm_set_pd(SIMDE_FLOAT64_C( -468.33), SIMDE_FLOAT64_C( -68.91)),
1350 simde_mm_set_pd(SIMDE_FLOAT64_C( -638.04), SIMDE_FLOAT64_C( 816.57)),
1351 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -468.33), SIMDE_FLOAT64_C( -68.91)), all_unset) },
1352 { simde_mm_set_pd(SIMDE_FLOAT64_C( 37.53), SIMDE_FLOAT64_C( 339.53)),
1353 simde_mm_set_pd(SIMDE_FLOAT64_C( 37.53), SIMDE_FLOAT64_C( 339.53)),
1354 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 37.53), SIMDE_FLOAT64_C( 339.53)), all_set) },
1355 { simde_mm_set_pd(SIMDE_FLOAT64_C( -872.05), SIMDE_FLOAT64_C( -696.39)),
1356 simde_mm_set_pd(SIMDE_FLOAT64_C( -872.05), SIMDE_FLOAT64_C( -696.39)),
1357 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -872.05), SIMDE_FLOAT64_C( -696.39)), all_set) },
1358 { simde_mm_set_pd(SIMDE_FLOAT64_C( 251.77), SIMDE_FLOAT64_C( -366.11)),
1359 simde_mm_set_pd(SIMDE_FLOAT64_C( 251.77), SIMDE_FLOAT64_C( -622.95)),
1360 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 251.77), SIMDE_FLOAT64_C( -366.11)), all_unset) }
1361 };
1362
1363 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1364 simde__m128d r = simde_mm_cmpeq_sd(test_vec[i].a, test_vec[i].b);
1365 simde_assert_m128d_equal(r, test_vec[i].r);
1366 }
1367
1368 return 0;
1369 }
1370
1371 static int
test_simde_mm_cmpneq_pd(SIMDE_MUNIT_TEST_ARGS)1372 test_simde_mm_cmpneq_pd(SIMDE_MUNIT_TEST_ARGS) {
1373 const struct {
1374 simde__m128d a;
1375 simde__m128d b;
1376 simde__m128i r;
1377 } test_vec[8] = {
1378 { simde_mm_set_pd(SIMDE_FLOAT64_C( -17.42), SIMDE_FLOAT64_C( -471.42)),
1379 simde_mm_set_pd(SIMDE_FLOAT64_C( -120.90), SIMDE_FLOAT64_C( -471.42)),
1380 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1381 { simde_mm_set_pd(SIMDE_FLOAT64_C( 174.13), SIMDE_FLOAT64_C( 302.06)),
1382 simde_mm_set_pd(SIMDE_FLOAT64_C( -462.00), SIMDE_FLOAT64_C( 302.06)),
1383 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1384 { simde_mm_set_pd(SIMDE_FLOAT64_C( 456.61), SIMDE_FLOAT64_C( -31.59)),
1385 simde_mm_set_pd(SIMDE_FLOAT64_C( 456.61), SIMDE_FLOAT64_C( -31.59)),
1386 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1387 { simde_mm_set_pd(SIMDE_FLOAT64_C( 863.75), SIMDE_FLOAT64_C( 743.68)),
1388 simde_mm_set_pd(SIMDE_FLOAT64_C( 863.75), SIMDE_FLOAT64_C( -940.38)),
1389 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) },
1390 { simde_mm_set_pd(SIMDE_FLOAT64_C( -207.80), SIMDE_FLOAT64_C( 181.86)),
1391 simde_mm_set_pd(SIMDE_FLOAT64_C( -207.80), SIMDE_FLOAT64_C( 980.93)),
1392 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) },
1393 { simde_mm_set_pd(SIMDE_FLOAT64_C( -330.41), SIMDE_FLOAT64_C( 936.80)),
1394 simde_mm_set_pd(SIMDE_FLOAT64_C( -223.97), SIMDE_FLOAT64_C( 936.80)),
1395 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1396 { simde_mm_set_pd(SIMDE_FLOAT64_C( -293.06), SIMDE_FLOAT64_C( -978.73)),
1397 simde_mm_set_pd(SIMDE_FLOAT64_C( -858.76), SIMDE_FLOAT64_C( -978.73)),
1398 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1399 { simde_mm_set_pd(SIMDE_FLOAT64_C( 215.10), SIMDE_FLOAT64_C( -720.29)),
1400 simde_mm_set_pd(SIMDE_FLOAT64_C( -813.22), SIMDE_FLOAT64_C( 235.59)),
1401 simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) }
1402 };
1403
1404 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1405 simde__m128i r = simde_mm_castpd_si128(simde_mm_cmpneq_pd(test_vec[i].a, test_vec[i].b));
1406 simde_assert_m128i_i64(r, ==, test_vec[i].r);
1407 }
1408
1409 return 0;
1410 }
1411
1412 static int
test_simde_mm_cmpneq_sd(SIMDE_MUNIT_TEST_ARGS)1413 test_simde_mm_cmpneq_sd(SIMDE_MUNIT_TEST_ARGS) {
1414 const struct {
1415 simde__m128d a;
1416 simde__m128d b;
1417 simde__m128d r;
1418 } test_vec[8] = {
1419 { simde_mm_set_pd(SIMDE_FLOAT64_C( 489.91), SIMDE_FLOAT64_C( 496.15)),
1420 simde_mm_set_pd(SIMDE_FLOAT64_C( -40.59), SIMDE_FLOAT64_C( 496.15)),
1421 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 489.91), SIMDE_FLOAT64_C( 496.15)), simde_mm_setzero_pd()) },
1422 { simde_mm_set_pd(SIMDE_FLOAT64_C( -981.34), SIMDE_FLOAT64_C( 944.87)),
1423 simde_mm_set_pd(SIMDE_FLOAT64_C( -433.21), SIMDE_FLOAT64_C( 882.20)),
1424 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -981.34), SIMDE_FLOAT64_C( 944.87)), simde_x_mm_setone_pd()) },
1425 { simde_mm_set_pd(SIMDE_FLOAT64_C( 983.93), SIMDE_FLOAT64_C( 764.39)),
1426 simde_mm_set_pd(SIMDE_FLOAT64_C( 621.75), SIMDE_FLOAT64_C( 764.39)),
1427 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 983.93), SIMDE_FLOAT64_C( 764.39)), simde_mm_setzero_pd()) },
1428 { simde_mm_set_pd(SIMDE_FLOAT64_C( 274.45), SIMDE_FLOAT64_C( 789.62)),
1429 simde_mm_set_pd(SIMDE_FLOAT64_C( 274.45), SIMDE_FLOAT64_C( 789.62)),
1430 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 274.45), SIMDE_FLOAT64_C( 789.62)), simde_mm_setzero_pd()) },
1431 { simde_mm_set_pd(SIMDE_FLOAT64_C( -35.27), SIMDE_FLOAT64_C( 92.02)),
1432 simde_mm_set_pd(SIMDE_FLOAT64_C( -35.27), SIMDE_FLOAT64_C( 92.02)),
1433 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -35.27), SIMDE_FLOAT64_C( 92.02)), simde_mm_setzero_pd()) },
1434 { simde_mm_set_pd(SIMDE_FLOAT64_C( -842.06), SIMDE_FLOAT64_C( -358.82)),
1435 simde_mm_set_pd(SIMDE_FLOAT64_C( 290.56), SIMDE_FLOAT64_C( 859.30)),
1436 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -842.06), SIMDE_FLOAT64_C( -358.82)), simde_x_mm_setone_pd()) },
1437 { simde_mm_set_pd(SIMDE_FLOAT64_C( -575.93), SIMDE_FLOAT64_C( -661.58)),
1438 simde_mm_set_pd(SIMDE_FLOAT64_C( -462.75), SIMDE_FLOAT64_C( 732.75)),
1439 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -575.93), SIMDE_FLOAT64_C( -661.58)), simde_x_mm_setone_pd()) },
1440 { simde_mm_set_pd(SIMDE_FLOAT64_C( -876.76), SIMDE_FLOAT64_C( -235.41)),
1441 simde_mm_set_pd(SIMDE_FLOAT64_C( 264.94), SIMDE_FLOAT64_C( 767.34)),
1442 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -876.76), SIMDE_FLOAT64_C( -235.41)), simde_x_mm_setone_pd()) }
1443 };
1444
1445 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1446 simde__m128d r = simde_mm_cmpneq_sd(test_vec[i].a, test_vec[i].b);
1447 simde_assert_m128d_equal(r, test_vec[i].r);
1448 }
1449
1450 return 0;
1451 }
1452
1453 static int
test_simde_mm_cmplt_epi8(SIMDE_MUNIT_TEST_ARGS)1454 test_simde_mm_cmplt_epi8(SIMDE_MUNIT_TEST_ARGS) {
1455 const struct {
1456 simde__m128i a;
1457 simde__m128i b;
1458 simde__m128i r;
1459 } test_vec[8] = {
1460 { simde_mm_set_epi8(INT8_C( -98), INT8_C( 126), INT8_C( -78), INT8_C( -97),
1461 INT8_C( -35), INT8_C( -49), INT8_C( -62), INT8_C( -8),
1462 INT8_C( -88), INT8_C( 71), INT8_C( 16), INT8_C( -4),
1463 INT8_C( 69), INT8_C( -61), INT8_C( 47), INT8_C( 84)),
1464 simde_mm_set_epi8(INT8_C( 5), INT8_C(-114), INT8_C( -27), INT8_C( -61),
1465 INT8_C( 56), INT8_C( 115), INT8_C( -53), INT8_C( 16),
1466 INT8_C( -80), INT8_C( -18), INT8_C( 83), INT8_C( -9),
1467 INT8_C( -3), INT8_C( 36), INT8_C( -57), INT8_C( 89)),
1468 simde_mm_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1),
1469 INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1),
1470 INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0),
1471 INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1)) },
1472 { simde_mm_set_epi8(INT8_C( 94), INT8_C( 78), INT8_C( -26), INT8_C(-126),
1473 INT8_C( -98), INT8_C( 65), INT8_C( 38), INT8_C( -71),
1474 INT8_C( -54), INT8_C( 20), INT8_C( -52), INT8_C( 55),
1475 INT8_C( -76), INT8_C( 37), INT8_C( -95), INT8_C( 91)),
1476 simde_mm_set_epi8(INT8_C( -10), INT8_C( -43), INT8_C( 70), INT8_C( -4),
1477 INT8_C( -89), INT8_C( -31), INT8_C( -61), INT8_C( 81),
1478 INT8_C( 64), INT8_C( -78), INT8_C( 14), INT8_C( 125),
1479 INT8_C( 81), INT8_C( 62), INT8_C(-124), INT8_C( 39)),
1480 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1),
1481 INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1),
1482 INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1),
1483 INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0)) },
1484 { simde_mm_set_epi8(INT8_C( 103), INT8_C( 49), INT8_C( 24), INT8_C( 0),
1485 INT8_C( -50), INT8_C( 64), INT8_C( 11), INT8_C( 101),
1486 INT8_C( 39), INT8_C( 41), INT8_C(-111), INT8_C( -32),
1487 INT8_C( 91), INT8_C( 86), INT8_C(-117), INT8_C( 115)),
1488 simde_mm_set_epi8(INT8_C( -44), INT8_C( 47), INT8_C( -14), INT8_C( 109),
1489 INT8_C( 44), INT8_C( 97), INT8_C( -41), INT8_C( 53),
1490 INT8_C(-121), INT8_C( -57), INT8_C( 54), INT8_C( 124),
1491 INT8_C( 50), INT8_C( -73), INT8_C( -30), INT8_C( -62)),
1492 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1),
1493 INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0),
1494 INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1),
1495 INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) },
1496 { simde_mm_set_epi8(INT8_C(-110), INT8_C( 5), INT8_C( 41), INT8_C( -3),
1497 INT8_C(-114), INT8_C( 14), INT8_C(-117), INT8_C( -89),
1498 INT8_C( 52), INT8_C( 62), INT8_C( 41), INT8_C( -25),
1499 INT8_C( 114), INT8_C( 56), INT8_C( 58), INT8_C( -99)),
1500 simde_mm_set_epi8(INT8_C( -31), INT8_C( -36), INT8_C(-126), INT8_C( -69),
1501 INT8_C( 113), INT8_C( -30), INT8_C( -24), INT8_C( 69),
1502 INT8_C( -15), INT8_C(-110), INT8_C( 23), INT8_C( 87),
1503 INT8_C(-127), INT8_C( -64), INT8_C( -38), INT8_C( -83)),
1504 simde_mm_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1505 INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1),
1506 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1),
1507 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) },
1508 { simde_mm_set_epi8(INT8_C( -38), INT8_C( -67), INT8_C( -79), INT8_C( -41),
1509 INT8_C(-114), INT8_C( 37), INT8_C( -71), INT8_C( 11),
1510 INT8_C( 105), INT8_C( 102), INT8_C( 48), INT8_C( 127),
1511 INT8_C( 84), INT8_C( 115), INT8_C(-102), INT8_C( -24)),
1512 simde_mm_set_epi8(INT8_C( 94), INT8_C( -20), INT8_C( -97), INT8_C( -2),
1513 INT8_C(-113), INT8_C( 46), INT8_C( 123), INT8_C( -9),
1514 INT8_C( 35), INT8_C( -47), INT8_C( 90), INT8_C( -73),
1515 INT8_C(-122), INT8_C( -3), INT8_C(-116), INT8_C( -4)),
1516 simde_mm_set_epi8(INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1),
1517 INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0),
1518 INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0),
1519 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) },
1520 { simde_mm_set_epi8(INT8_C( 27), INT8_C( 41), INT8_C( 36), INT8_C( -97),
1521 INT8_C( -84), INT8_C( 108), INT8_C( -37), INT8_C( -69),
1522 INT8_C( -29), INT8_C( 45), INT8_C( 101), INT8_C( 104),
1523 INT8_C( 102), INT8_C( -85), INT8_C( 3), INT8_C( 124)),
1524 simde_mm_set_epi8(INT8_C(-119), INT8_C( 16), INT8_C( -80), INT8_C( 97),
1525 INT8_C( 97), INT8_C( -44), INT8_C( 71), INT8_C( -43),
1526 INT8_C( 39), INT8_C( -54), INT8_C( 15), INT8_C( -61),
1527 INT8_C( 100), INT8_C( -92), INT8_C( 5), INT8_C( -93)),
1528 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1),
1529 INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1),
1530 INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1531 INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) },
1532 { simde_mm_set_epi8(INT8_C( 53), INT8_C( 82), INT8_C( -6), INT8_C( 99),
1533 INT8_C( 95), INT8_C( -34), INT8_C( -90), INT8_C( -14),
1534 INT8_C( -43), INT8_C( -72), INT8_C( -83), INT8_C(-104),
1535 INT8_C( -1), INT8_C( -60), INT8_C( 103), INT8_C( -66)),
1536 simde_mm_set_epi8(INT8_C( 13), INT8_C( 118), INT8_C( 25), INT8_C( 60),
1537 INT8_C( -83), INT8_C( -43), INT8_C( 90), INT8_C( 54),
1538 INT8_C( -84), INT8_C(-125), INT8_C( -41), INT8_C( 52),
1539 INT8_C( 18), INT8_C( 46), INT8_C( 126), INT8_C( -65)),
1540 simde_mm_set_epi8(INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0),
1541 INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1),
1542 INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1),
1543 INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1)) },
1544 { simde_mm_set_epi8(INT8_C( 43), INT8_C( 123), INT8_C( -13), INT8_C( 35),
1545 INT8_C(-119), INT8_C( 53), INT8_C( -35), INT8_C( -46),
1546 INT8_C( 44), INT8_C( 69), INT8_C( 50), INT8_C(-120),
1547 INT8_C( 2), INT8_C( 50), INT8_C( -95), INT8_C( 46)),
1548 simde_mm_set_epi8(INT8_C( -57), INT8_C( -76), INT8_C(-104), INT8_C(-127),
1549 INT8_C( -27), INT8_C( 127), INT8_C( 127), INT8_C(-109),
1550 INT8_C( 40), INT8_C( -63), INT8_C( 87), INT8_C( -27),
1551 INT8_C( -1), INT8_C(-101), INT8_C( 11), INT8_C( 44)),
1552 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
1553 INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0),
1554 INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1),
1555 INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) }
1556 };
1557
1558 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
1559 simde__m128i r = simde_mm_cmplt_epi8(test_vec[i].a, test_vec[i].b);
1560 simde_assert_m128i_i8(r, ==, test_vec[i].r);
1561 }
1562
1563 return 0;
1564 }
1565
1566 static int
test_simde_mm_cmplt_epi16(SIMDE_MUNIT_TEST_ARGS)1567 test_simde_mm_cmplt_epi16(SIMDE_MUNIT_TEST_ARGS) {
1568 const struct {
1569 simde__m128i a;
1570 simde__m128i b;
1571 simde__m128i r;
1572 } test_vec[8] = {
1573 { simde_mm_set_epi16(INT16_C(-19152), INT16_C( 7219), INT16_C( 8875), INT16_C(-12109),
1574 INT16_C( -6164), INT16_C(-29571), INT16_C( 29544), INT16_C( 12828)),
1575 simde_mm_set_epi16(INT16_C(-19152), INT16_C( -1176), INT16_C(-32721), INT16_C( 28268),
1576 INT16_C( 28536), INT16_C(-24890), INT16_C(-20501), INT16_C( 12828)),
1577 simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1),
1578 INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0)) },
1579 { simde_mm_set_epi16(INT16_C( -385), INT16_C( 10411), INT16_C( -4671), INT16_C( 18534),
1580 INT16_C( 18234), INT16_C( 8064), INT16_C(-32746), INT16_C( 1460)),
1581 simde_mm_set_epi16(INT16_C(-11261), INT16_C( 19475), INT16_C( -4671), INT16_C(-23700),
1582 INT16_C( 8656), INT16_C( 8064), INT16_C(-28801), INT16_C( 5582)),
1583 simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0),
1584 INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1)) },
1585 { simde_mm_set_epi16(INT16_C( 23112), INT16_C( 21760), INT16_C(-29652), INT16_C( -7707),
1586 INT16_C( 4438), INT16_C(-14112), INT16_C( 617), INT16_C(-29125)),
1587 simde_mm_set_epi16(INT16_C(-20847), INT16_C(-17750), INT16_C( 7413), INT16_C( 13270),
1588 INT16_C( 30220), INT16_C(-14112), INT16_C( -140), INT16_C( 23495)),
1589 simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1),
1590 INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1)) },
1591 { simde_mm_set_epi16(INT16_C( 12262), INT16_C(-26458), INT16_C(-17793), INT16_C( 15097),
1592 INT16_C(-28884), INT16_C( -39), INT16_C( 29206), INT16_C( 24614)),
1593 simde_mm_set_epi16(INT16_C(-12392), INT16_C(-30769), INT16_C(-17793), INT16_C( 15097),
1594 INT16_C( 22525), INT16_C( 7510), INT16_C( 28529), INT16_C( -9470)),
1595 simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0),
1596 INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0)) },
1597 { simde_mm_set_epi16(INT16_C( 14581), INT16_C( 30465), INT16_C( 26611), INT16_C(-25355),
1598 INT16_C( 12222), INT16_C(-12322), INT16_C( 176), INT16_C( -4760)),
1599 simde_mm_set_epi16(INT16_C( 10242), INT16_C( 15750), INT16_C(-11513), INT16_C( 7111),
1600 INT16_C(-29171), INT16_C(-12322), INT16_C( 176), INT16_C( -4760)),
1601 simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1),
1602 INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
1603 { simde_mm_set_epi16(INT16_C(-12610), INT16_C( 30415), INT16_C(-22748), INT16_C( 8564),
1604 INT16_C(-28400), INT16_C(-22984), INT16_C(-31130), INT16_C( 2400)),
1605 simde_mm_set_epi16(INT16_C( 17489), INT16_C(-18807), INT16_C( 19401), INT16_C( -73),
1606 INT16_C(-28400), INT16_C( -7356), INT16_C( 31412), INT16_C( 2400)),
1607 simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0),
1608 INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) },
1609 { simde_mm_set_epi16(INT16_C( 5617), INT16_C( -8984), INT16_C( 20729), INT16_C( 15025),
1610 INT16_C(-12038), INT16_C(-32017), INT16_C(-24693), INT16_C( -3874)),
1611 simde_mm_set_epi16(INT16_C( -8219), INT16_C( 19022), INT16_C(-28515), INT16_C( 15025),
1612 INT16_C( -2982), INT16_C( -314), INT16_C( 16536), INT16_C(-17813)),
1613 simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0),
1614 INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( 0)) },
1615 { simde_mm_set_epi16(INT16_C( 3028), INT16_C( 25056), INT16_C(-30420), INT16_C( 3400),
1616 INT16_C( 27498), INT16_C(-24168), INT16_C(-10264), INT16_C( -5651)),
1617 simde_mm_set_epi16(INT16_C( 16763), INT16_C( 3971), INT16_C(-30420), INT16_C(-13950),
1618 INT16_C( 26793), INT16_C(-27284), INT16_C( 22512), INT16_C(-19434)),
1619 simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0),
1620 INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)) }
1621 };
1622
1623 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1624 simde__m128i r = simde_mm_cmplt_epi16(test_vec[i].a, test_vec[i].b);
1625 simde_assert_m128i_i16(r, ==, test_vec[i].r);
1626 }
1627
1628 return 0;
1629 }
1630
1631 static int
test_simde_mm_cmplt_epi32(SIMDE_MUNIT_TEST_ARGS)1632 test_simde_mm_cmplt_epi32(SIMDE_MUNIT_TEST_ARGS) {
1633 const struct {
1634 simde__m128i a;
1635 simde__m128i b;
1636 simde__m128i r;
1637 } test_vec[8] = {
1638 { simde_mm_set_epi32(-1255138253, 581685427, -403927939, 1936208412),
1639 simde_mm_set_epi32( 212007784, -2144375188, 1870175942, 1936208412),
1640 simde_mm_set_epi32( -1, 0, -1, 0) },
1641 { simde_mm_set_epi32(-1412605706, -573136614, -789373589, 1859272017),
1642 simde_mm_set_epi32(-1412605706, -306100122, 1194991488, -2146040396),
1643 simde_mm_set_epi32( 0, -1, -1, 0) },
1644 { simde_mm_set_epi32(-1857828629, -865462431, 1845130162, -790702535),
1645 simde_mm_set_epi32( 1020632409, -786544507, 219144900, 222814568),
1646 simde_mm_set_epi32( -1, -1, 0, -1) },
1647 { simde_mm_set_epi32(-1366181206, 485831638, 1980524634, -9151545),
1648 simde_mm_set_epi32(-1932199485, 327347510, 706051828, -541415230),
1649 simde_mm_set_epi32( 0, 0, 0, 0) },
1650 { simde_mm_set_epi32( 803641510, -1166066951, -1892876327, 1914069030),
1651 simde_mm_set_epi32( -812087345, -1002684270, 1476205910, 1869732610),
1652 simde_mm_set_epi32( 0, -1, -1, 0) },
1653 { simde_mm_set_epi32(-1773657387, -1529382252, 1397468980, 1171964570),
1654 simde_mm_set_epi32( 955610881, 1744018677, 801034206, 1171964570),
1655 simde_mm_set_epi32( -1, -1, 0, 0) },
1656 { simde_mm_set_epi32(-1807229965, -1210178631, 1522043695, -1735369601),
1657 simde_mm_set_epi32(-1560329504, 1101415557, 1311721597, 1371106332),
1658 simde_mm_set_epi32( -1, -1, 0, -1) },
1659 { simde_mm_set_epi32( 1146205833, 1271529399, 1661264708, 2058651784),
1660 simde_mm_set_epi32( 624079870, 1320739553, -1066082248, -1119644266),
1661 simde_mm_set_epi32( 0, -1, 0, 0) }
1662 };
1663
1664 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1665 simde__m128i r = simde_mm_cmplt_epi32(test_vec[i].a, test_vec[i].b);
1666 simde_assert_m128i_i32(r, ==, test_vec[i].r);
1667 }
1668
1669 return 0;
1670 }
1671
1672 static int
test_simde_mm_cmplt_pd(SIMDE_MUNIT_TEST_ARGS)1673 test_simde_mm_cmplt_pd(SIMDE_MUNIT_TEST_ARGS) {
1674 const struct {
1675 simde__m128d a;
1676 simde__m128d b;
1677 simde__m128i r;
1678 } test_vec[8] = {
1679 { simde_mm_set_pd(SIMDE_FLOAT64_C( 415.53), SIMDE_FLOAT64_C( -98.38)),
1680 simde_mm_set_pd(SIMDE_FLOAT64_C( 415.53), SIMDE_FLOAT64_C( -729.13)),
1681 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1682 { simde_mm_set_pd(SIMDE_FLOAT64_C( -102.02), SIMDE_FLOAT64_C( -129.13)),
1683 simde_mm_set_pd(SIMDE_FLOAT64_C( 345.59), SIMDE_FLOAT64_C( -901.28)),
1684 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1685 { simde_mm_set_pd(SIMDE_FLOAT64_C( 733.11), SIMDE_FLOAT64_C( 268.99)),
1686 simde_mm_set_pd(SIMDE_FLOAT64_C( 733.11), SIMDE_FLOAT64_C( 632.42)),
1687 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) },
1688 { simde_mm_set_pd(SIMDE_FLOAT64_C( 988.26), SIMDE_FLOAT64_C( 0.67)),
1689 simde_mm_set_pd(SIMDE_FLOAT64_C( -735.83), SIMDE_FLOAT64_C( 857.46)),
1690 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) },
1691 { simde_mm_set_pd(SIMDE_FLOAT64_C( 631.80), SIMDE_FLOAT64_C( -84.12)),
1692 simde_mm_set_pd(SIMDE_FLOAT64_C( 596.99), SIMDE_FLOAT64_C( -84.12)),
1693 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1694 { simde_mm_set_pd(SIMDE_FLOAT64_C( 633.74), SIMDE_FLOAT64_C( 134.88)),
1695 simde_mm_set_pd(SIMDE_FLOAT64_C( -981.15), SIMDE_FLOAT64_C( -897.95)),
1696 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1697 { simde_mm_set_pd(SIMDE_FLOAT64_C( 995.74), SIMDE_FLOAT64_C( -864.54)),
1698 simde_mm_set_pd(SIMDE_FLOAT64_C( -773.77), SIMDE_FLOAT64_C( -294.67)),
1699 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) },
1700 { simde_mm_set_pd(SIMDE_FLOAT64_C( -847.57), SIMDE_FLOAT64_C( 363.82)),
1701 simde_mm_set_pd(SIMDE_FLOAT64_C( 743.31), SIMDE_FLOAT64_C( -671.22)),
1702 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }
1703 };
1704
1705 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1706 simde__m128i r = simde_mm_castpd_si128(simde_mm_cmplt_pd(test_vec[i].a, test_vec[i].b));
1707 simde_assert_m128i_i64(r, ==, test_vec[i].r);
1708 }
1709
1710 return 0;
1711 }
1712
1713 static int
test_simde_mm_cmplt_sd(SIMDE_MUNIT_TEST_ARGS)1714 test_simde_mm_cmplt_sd(SIMDE_MUNIT_TEST_ARGS) {
1715 const struct {
1716 simde__m128d a;
1717 simde__m128d b;
1718 simde__m128d r;
1719 } test_vec[8] = {
1720 { simde_mm_set_pd(SIMDE_FLOAT64_C( 884.89), SIMDE_FLOAT64_C( -700.86)),
1721 simde_mm_set_pd(SIMDE_FLOAT64_C( 194.09), SIMDE_FLOAT64_C( 342.08)),
1722 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 884.89), SIMDE_FLOAT64_C( -700.86)), simde_x_mm_setone_pd()) },
1723 { simde_mm_set_pd(SIMDE_FLOAT64_C( -552.29), SIMDE_FLOAT64_C( -477.43)),
1724 simde_mm_set_pd(SIMDE_FLOAT64_C( -288.53), SIMDE_FLOAT64_C( -439.96)),
1725 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -552.29), SIMDE_FLOAT64_C( -477.43)), simde_x_mm_setone_pd()) },
1726 { simde_mm_set_pd(SIMDE_FLOAT64_C( -25.82), SIMDE_FLOAT64_C( -940.19)),
1727 simde_mm_set_pd(SIMDE_FLOAT64_C( 251.57), SIMDE_FLOAT64_C( 618.81)),
1728 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -25.82), SIMDE_FLOAT64_C( -940.19)), simde_x_mm_setone_pd()) },
1729 { simde_mm_set_pd(SIMDE_FLOAT64_C( -905.83), SIMDE_FLOAT64_C( 120.16)),
1730 simde_mm_set_pd(SIMDE_FLOAT64_C( -235.64), SIMDE_FLOAT64_C( -293.77)),
1731 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -905.83), SIMDE_FLOAT64_C( 120.16)), simde_mm_setzero_pd()) },
1732 { simde_mm_set_pd(SIMDE_FLOAT64_C( 804.90), SIMDE_FLOAT64_C( 266.33)),
1733 simde_mm_set_pd(SIMDE_FLOAT64_C( -104.58), SIMDE_FLOAT64_C( -965.81)),
1734 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 804.90), SIMDE_FLOAT64_C( 266.33)), simde_mm_setzero_pd()) },
1735 { simde_mm_set_pd(SIMDE_FLOAT64_C( 44.09), SIMDE_FLOAT64_C( -365.90)),
1736 simde_mm_set_pd(SIMDE_FLOAT64_C( 534.45), SIMDE_FLOAT64_C( -718.87)),
1737 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 44.09), SIMDE_FLOAT64_C( -365.90)), simde_mm_setzero_pd()) },
1738 { simde_mm_set_pd(SIMDE_FLOAT64_C( -108.03), SIMDE_FLOAT64_C( 233.20)),
1739 simde_mm_set_pd(SIMDE_FLOAT64_C( -420.51), SIMDE_FLOAT64_C( -879.83)),
1740 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -108.03), SIMDE_FLOAT64_C( 233.20)), simde_mm_setzero_pd()) },
1741 { simde_mm_set_pd(SIMDE_FLOAT64_C( -399.23), SIMDE_FLOAT64_C( 758.04)),
1742 simde_mm_set_pd(SIMDE_FLOAT64_C( -334.35), SIMDE_FLOAT64_C( -250.33)),
1743 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -399.23), SIMDE_FLOAT64_C( 758.04)), simde_mm_setzero_pd()) }
1744 };
1745
1746 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1747 simde__m128d r = simde_mm_cmplt_sd(test_vec[i].a, test_vec[i].b);
1748 simde_assert_m128d_equal(r, test_vec[i].r);
1749 }
1750
1751 return 0;
1752 }
1753
1754 static int
test_simde_mm_cmpnlt_pd(SIMDE_MUNIT_TEST_ARGS)1755 test_simde_mm_cmpnlt_pd(SIMDE_MUNIT_TEST_ARGS) {
1756 const struct {
1757 simde__m128d a;
1758 simde__m128d b;
1759 simde__m128i r;
1760 } test_vec[8] = {
1761 { simde_mm_set_pd(SIMDE_FLOAT64_C( 852.01), SIMDE_FLOAT64_C( -875.21)),
1762 simde_mm_set_pd(SIMDE_FLOAT64_C( 852.01), SIMDE_FLOAT64_C( -124.49)),
1763 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1764 { simde_mm_set_pd(SIMDE_FLOAT64_C( 396.43), SIMDE_FLOAT64_C( -754.13)),
1765 simde_mm_set_pd(SIMDE_FLOAT64_C( 396.43), SIMDE_FLOAT64_C( -446.22)),
1766 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1767 { simde_mm_set_pd(SIMDE_FLOAT64_C( 253.13), SIMDE_FLOAT64_C( 198.68)),
1768 simde_mm_set_pd(SIMDE_FLOAT64_C( 253.13), SIMDE_FLOAT64_C( 828.60)),
1769 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1770 { simde_mm_set_pd(SIMDE_FLOAT64_C( 137.84), SIMDE_FLOAT64_C( -995.54)),
1771 simde_mm_set_pd(SIMDE_FLOAT64_C( 137.84), SIMDE_FLOAT64_C( -366.89)),
1772 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1773 { simde_mm_set_pd(SIMDE_FLOAT64_C( 981.94), SIMDE_FLOAT64_C( -371.83)),
1774 simde_mm_set_pd(SIMDE_FLOAT64_C( -999.24), SIMDE_FLOAT64_C( 567.77)),
1775 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1776 { simde_mm_set_pd(SIMDE_FLOAT64_C( 471.23), SIMDE_FLOAT64_C( -984.85)),
1777 simde_mm_set_pd(SIMDE_FLOAT64_C( -365.65), SIMDE_FLOAT64_C( 102.67)),
1778 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1779 { simde_mm_set_pd(SIMDE_FLOAT64_C( -625.86), SIMDE_FLOAT64_C( -91.22)),
1780 simde_mm_set_pd(SIMDE_FLOAT64_C( -928.96), SIMDE_FLOAT64_C( -311.29)),
1781 simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) },
1782 { simde_mm_set_pd(SIMDE_FLOAT64_C( -444.22), SIMDE_FLOAT64_C( 458.27)),
1783 simde_mm_set_pd(SIMDE_FLOAT64_C( 882.56), SIMDE_FLOAT64_C( 290.13)),
1784 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) }
1785 };
1786
1787 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1788 simde__m128i r = simde_mm_castpd_si128(simde_mm_cmpnlt_pd(test_vec[i].a, test_vec[i].b));
1789 simde_assert_m128i_i64(r, ==, test_vec[i].r);
1790 }
1791
1792 return 0;
1793 }
1794
1795 static int
test_simde_mm_cmpnlt_sd(SIMDE_MUNIT_TEST_ARGS)1796 test_simde_mm_cmpnlt_sd(SIMDE_MUNIT_TEST_ARGS) {
1797 const struct {
1798 simde__m128d a;
1799 simde__m128d b;
1800 simde__m128d r;
1801 } test_vec[8] = {
1802 { simde_mm_set_pd(SIMDE_FLOAT64_C( -713.31), SIMDE_FLOAT64_C( -162.56)),
1803 simde_mm_set_pd(SIMDE_FLOAT64_C( -134.78), SIMDE_FLOAT64_C( -333.93)),
1804 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -713.31), SIMDE_FLOAT64_C( -162.56)), simde_x_mm_setone_pd()) },
1805 { simde_mm_set_pd(SIMDE_FLOAT64_C( 903.93), SIMDE_FLOAT64_C( 249.58)),
1806 simde_mm_set_pd(SIMDE_FLOAT64_C( 300.72), SIMDE_FLOAT64_C( -642.46)),
1807 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 903.93), SIMDE_FLOAT64_C( 249.58)), simde_x_mm_setone_pd()) },
1808 { simde_mm_set_pd(SIMDE_FLOAT64_C( -697.34), SIMDE_FLOAT64_C( 79.67)),
1809 simde_mm_set_pd(SIMDE_FLOAT64_C( -123.52), SIMDE_FLOAT64_C( -418.48)),
1810 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -697.34), SIMDE_FLOAT64_C( 79.67)), simde_x_mm_setone_pd()) },
1811 { simde_mm_set_pd(SIMDE_FLOAT64_C( 90.71), SIMDE_FLOAT64_C( -449.42)),
1812 simde_mm_set_pd(SIMDE_FLOAT64_C( 629.69), SIMDE_FLOAT64_C( 449.98)),
1813 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 90.71), SIMDE_FLOAT64_C( -449.42)), simde_mm_setzero_pd()) },
1814 { simde_mm_set_pd(SIMDE_FLOAT64_C( 751.69), SIMDE_FLOAT64_C( -170.45)),
1815 simde_mm_set_pd(SIMDE_FLOAT64_C( -991.25), SIMDE_FLOAT64_C( 129.62)),
1816 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 751.69), SIMDE_FLOAT64_C( -170.45)), simde_mm_setzero_pd()) },
1817 { simde_mm_set_pd(SIMDE_FLOAT64_C( 961.53), SIMDE_FLOAT64_C( -601.03)),
1818 simde_mm_set_pd(SIMDE_FLOAT64_C( -458.00), SIMDE_FLOAT64_C( -521.61)),
1819 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 961.53), SIMDE_FLOAT64_C( -601.03)), simde_mm_setzero_pd()) },
1820 { simde_mm_set_pd(SIMDE_FLOAT64_C( -697.78), SIMDE_FLOAT64_C( 908.22)),
1821 simde_mm_set_pd(SIMDE_FLOAT64_C( -418.87), SIMDE_FLOAT64_C( 253.38)),
1822 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -697.78), SIMDE_FLOAT64_C( 908.22)), simde_x_mm_setone_pd()) },
1823 { simde_mm_set_pd(SIMDE_FLOAT64_C( 897.84), SIMDE_FLOAT64_C( 98.86)),
1824 simde_mm_set_pd(SIMDE_FLOAT64_C( 743.55), SIMDE_FLOAT64_C( -417.08)),
1825 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 897.84), SIMDE_FLOAT64_C( 98.86)), simde_x_mm_setone_pd()) }
1826 };
1827
1828 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1829 simde__m128d r = simde_mm_cmpnlt_sd(test_vec[i].a, test_vec[i].b);
1830 simde_assert_m128d_equal(r, test_vec[i].r);
1831 }
1832
1833 return 0;
1834 }
1835
1836 static int
test_simde_mm_cmple_pd(SIMDE_MUNIT_TEST_ARGS)1837 test_simde_mm_cmple_pd(SIMDE_MUNIT_TEST_ARGS) {
1838 const struct {
1839 simde__m128d a;
1840 simde__m128d b;
1841 simde__m128i r;
1842 } test_vec[8] = {
1843 { simde_mm_set_pd(SIMDE_FLOAT64_C( 543.54), SIMDE_FLOAT64_C( -463.43)),
1844 simde_mm_set_pd(SIMDE_FLOAT64_C( 803.80), SIMDE_FLOAT64_C( -383.88)),
1845 simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) },
1846 { simde_mm_set_pd(SIMDE_FLOAT64_C( -420.41), SIMDE_FLOAT64_C( 497.43)),
1847 simde_mm_set_pd(SIMDE_FLOAT64_C( -592.95), SIMDE_FLOAT64_C( -224.51)),
1848 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1849 { simde_mm_set_pd(SIMDE_FLOAT64_C( -921.01), SIMDE_FLOAT64_C( -601.69)),
1850 simde_mm_set_pd(SIMDE_FLOAT64_C( -921.01), SIMDE_FLOAT64_C( -730.20)),
1851 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1852 { simde_mm_set_pd(SIMDE_FLOAT64_C( -456.56), SIMDE_FLOAT64_C( 380.21)),
1853 simde_mm_set_pd(SIMDE_FLOAT64_C( -456.56), SIMDE_FLOAT64_C( 380.21)),
1854 simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) },
1855 { simde_mm_set_pd(SIMDE_FLOAT64_C( 317.08), SIMDE_FLOAT64_C( 136.54)),
1856 simde_mm_set_pd(SIMDE_FLOAT64_C( 944.53), SIMDE_FLOAT64_C( 370.42)),
1857 simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) },
1858 { simde_mm_set_pd(SIMDE_FLOAT64_C( -193.09), SIMDE_FLOAT64_C( 515.21)),
1859 simde_mm_set_pd(SIMDE_FLOAT64_C( -63.27), SIMDE_FLOAT64_C( 515.21)),
1860 simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) },
1861 { simde_mm_set_pd(SIMDE_FLOAT64_C( 14.93), SIMDE_FLOAT64_C( 166.91)),
1862 simde_mm_set_pd(SIMDE_FLOAT64_C( 14.93), SIMDE_FLOAT64_C( -633.50)),
1863 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1864 { simde_mm_set_pd(SIMDE_FLOAT64_C( 121.98), SIMDE_FLOAT64_C( -542.50)),
1865 simde_mm_set_pd(SIMDE_FLOAT64_C( 121.98), SIMDE_FLOAT64_C( -244.93)),
1866 simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) }
1867 };
1868
1869 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1870 simde__m128i r = simde_mm_castpd_si128(simde_mm_cmple_pd(test_vec[i].a, test_vec[i].b));
1871 simde_assert_m128i_i64(r, ==, test_vec[i].r);
1872 }
1873
1874 return 0;
1875 }
1876
1877 static int
test_simde_mm_cmple_sd(SIMDE_MUNIT_TEST_ARGS)1878 test_simde_mm_cmple_sd(SIMDE_MUNIT_TEST_ARGS) {
1879 const struct {
1880 simde__m128d a;
1881 simde__m128d b;
1882 simde__m128d r;
1883 } test_vec[8] = {
1884 { simde_mm_set_pd(SIMDE_FLOAT64_C( -6.97), SIMDE_FLOAT64_C( -531.93)),
1885 simde_mm_set_pd(SIMDE_FLOAT64_C( 442.04), SIMDE_FLOAT64_C( 237.56)),
1886 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -6.97), SIMDE_FLOAT64_C( -531.93)), simde_x_mm_setone_pd()) },
1887 { simde_mm_set_pd(SIMDE_FLOAT64_C( 953.55), SIMDE_FLOAT64_C( -668.52)),
1888 simde_mm_set_pd(SIMDE_FLOAT64_C( 75.21), SIMDE_FLOAT64_C( -841.44)),
1889 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 953.55), SIMDE_FLOAT64_C( -668.52)), simde_mm_setzero_pd()) },
1890 { simde_mm_set_pd(SIMDE_FLOAT64_C( -962.44), SIMDE_FLOAT64_C( 733.31)),
1891 simde_mm_set_pd(SIMDE_FLOAT64_C( 366.34), SIMDE_FLOAT64_C( 744.84)),
1892 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -962.44), SIMDE_FLOAT64_C( 733.31)), simde_x_mm_setone_pd()) },
1893 { simde_mm_set_pd(SIMDE_FLOAT64_C( 546.64), SIMDE_FLOAT64_C( 333.17)),
1894 simde_mm_set_pd(SIMDE_FLOAT64_C( 540.77), SIMDE_FLOAT64_C( -0.80)),
1895 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 546.64), SIMDE_FLOAT64_C( 333.17)), simde_mm_setzero_pd()) },
1896 { simde_mm_set_pd(SIMDE_FLOAT64_C( -407.18), SIMDE_FLOAT64_C( -763.20)),
1897 simde_mm_set_pd(SIMDE_FLOAT64_C( 973.34), SIMDE_FLOAT64_C( -496.03)),
1898 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -407.18), SIMDE_FLOAT64_C( -763.20)), simde_x_mm_setone_pd()) },
1899 { simde_mm_set_pd(SIMDE_FLOAT64_C( 407.82), SIMDE_FLOAT64_C( 479.81)),
1900 simde_mm_set_pd(SIMDE_FLOAT64_C( 198.41), SIMDE_FLOAT64_C( 710.05)),
1901 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 407.82), SIMDE_FLOAT64_C( 479.81)), simde_x_mm_setone_pd()) },
1902 { simde_mm_set_pd(SIMDE_FLOAT64_C( -282.72), SIMDE_FLOAT64_C( -348.78)),
1903 simde_mm_set_pd(SIMDE_FLOAT64_C( 165.84), SIMDE_FLOAT64_C( -951.18)),
1904 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -282.72), SIMDE_FLOAT64_C( -348.78)), simde_mm_setzero_pd()) },
1905 { simde_mm_set_pd(SIMDE_FLOAT64_C( 642.13), SIMDE_FLOAT64_C( -574.77)),
1906 simde_mm_set_pd(SIMDE_FLOAT64_C( -633.14), SIMDE_FLOAT64_C( 741.95)),
1907 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 642.13), SIMDE_FLOAT64_C( -574.77)), simde_x_mm_setone_pd()) }
1908 };
1909
1910 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1911 simde__m128d r = simde_mm_cmple_sd(test_vec[i].a, test_vec[i].b);
1912 simde_assert_m128d_equal(r, test_vec[i].r);
1913 }
1914
1915 return 0;
1916 }
1917
1918 static int
test_simde_mm_cmpnle_pd(SIMDE_MUNIT_TEST_ARGS)1919 test_simde_mm_cmpnle_pd(SIMDE_MUNIT_TEST_ARGS) {
1920 const struct {
1921 simde__m128d a;
1922 simde__m128d b;
1923 simde__m128i r;
1924 } test_vec[8] = {
1925 { simde_mm_set_pd(SIMDE_FLOAT64_C( -50.93), SIMDE_FLOAT64_C( -877.25)),
1926 simde_mm_set_pd(SIMDE_FLOAT64_C( -50.93), SIMDE_FLOAT64_C( 61.42)),
1927 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1928 { simde_mm_set_pd(SIMDE_FLOAT64_C( 567.19), SIMDE_FLOAT64_C( 768.82)),
1929 simde_mm_set_pd(SIMDE_FLOAT64_C( -689.51), SIMDE_FLOAT64_C( 768.82)),
1930 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1931 { simde_mm_set_pd(SIMDE_FLOAT64_C( 625.00), SIMDE_FLOAT64_C( 979.36)),
1932 simde_mm_set_pd(SIMDE_FLOAT64_C( 59.83), SIMDE_FLOAT64_C( 979.36)),
1933 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1934 { simde_mm_set_pd(SIMDE_FLOAT64_C( -994.58), SIMDE_FLOAT64_C( 130.45)),
1935 simde_mm_set_pd(SIMDE_FLOAT64_C( -720.49), SIMDE_FLOAT64_C( 130.45)),
1936 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1937 { simde_mm_set_pd(SIMDE_FLOAT64_C( 889.79), SIMDE_FLOAT64_C( -677.25)),
1938 simde_mm_set_pd(SIMDE_FLOAT64_C( 889.79), SIMDE_FLOAT64_C( -677.25)),
1939 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1940 { simde_mm_set_pd(SIMDE_FLOAT64_C( 802.11), SIMDE_FLOAT64_C( -926.46)),
1941 simde_mm_set_pd(SIMDE_FLOAT64_C( -136.48), SIMDE_FLOAT64_C( -926.46)),
1942 simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1943 { simde_mm_set_pd(SIMDE_FLOAT64_C( -579.79), SIMDE_FLOAT64_C( 368.31)),
1944 simde_mm_set_pd(SIMDE_FLOAT64_C( -579.79), SIMDE_FLOAT64_C( -736.86)),
1945 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) },
1946 { simde_mm_set_pd(SIMDE_FLOAT64_C( 408.86), SIMDE_FLOAT64_C( 63.85)),
1947 simde_mm_set_pd(SIMDE_FLOAT64_C( 408.86), SIMDE_FLOAT64_C( 878.02)),
1948 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }
1949 };
1950
1951 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1952 simde__m128i r = simde_mm_castpd_si128(simde_mm_cmpnle_pd(test_vec[i].a, test_vec[i].b));
1953 simde_assert_m128i_i64(r, ==, test_vec[i].r);
1954 }
1955
1956 return 0;
1957 }
1958
1959 static int
test_simde_mm_cmpnle_sd(SIMDE_MUNIT_TEST_ARGS)1960 test_simde_mm_cmpnle_sd(SIMDE_MUNIT_TEST_ARGS) {
1961 const struct {
1962 simde__m128d a;
1963 simde__m128d b;
1964 simde__m128d r;
1965 } test_vec[8] = {
1966 { simde_mm_set_pd(SIMDE_FLOAT64_C( 863.30), SIMDE_FLOAT64_C( 817.71)),
1967 simde_mm_set_pd(SIMDE_FLOAT64_C( 465.11), SIMDE_FLOAT64_C( 402.99)),
1968 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 863.30), SIMDE_FLOAT64_C( 817.71)), simde_x_mm_setone_pd()) },
1969 { simde_mm_set_pd(SIMDE_FLOAT64_C( 189.40), SIMDE_FLOAT64_C( -607.91)),
1970 simde_mm_set_pd(SIMDE_FLOAT64_C( -476.72), SIMDE_FLOAT64_C( -670.93)),
1971 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 189.40), SIMDE_FLOAT64_C( -607.91)), simde_x_mm_setone_pd()) },
1972 { simde_mm_set_pd(SIMDE_FLOAT64_C( -333.27), SIMDE_FLOAT64_C( 662.88)),
1973 simde_mm_set_pd(SIMDE_FLOAT64_C( 741.44), SIMDE_FLOAT64_C( -212.71)),
1974 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -333.27), SIMDE_FLOAT64_C( 662.88)), simde_x_mm_setone_pd()) },
1975 { simde_mm_set_pd(SIMDE_FLOAT64_C( 426.15), SIMDE_FLOAT64_C( -964.01)),
1976 simde_mm_set_pd(SIMDE_FLOAT64_C( 54.04), SIMDE_FLOAT64_C( 321.51)),
1977 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 426.15), SIMDE_FLOAT64_C( -964.01)), simde_mm_setzero_pd()) },
1978 { simde_mm_set_pd(SIMDE_FLOAT64_C( -797.48), SIMDE_FLOAT64_C( 851.48)),
1979 simde_mm_set_pd(SIMDE_FLOAT64_C( 907.15), SIMDE_FLOAT64_C( 638.76)),
1980 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -797.48), SIMDE_FLOAT64_C( 851.48)), simde_x_mm_setone_pd()) },
1981 { simde_mm_set_pd(SIMDE_FLOAT64_C( 439.23), SIMDE_FLOAT64_C( 238.01)),
1982 simde_mm_set_pd(SIMDE_FLOAT64_C( -23.09), SIMDE_FLOAT64_C( 160.20)),
1983 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 439.23), SIMDE_FLOAT64_C( 238.01)), simde_x_mm_setone_pd()) },
1984 { simde_mm_set_pd(SIMDE_FLOAT64_C( 537.28), SIMDE_FLOAT64_C( 982.90)),
1985 simde_mm_set_pd(SIMDE_FLOAT64_C( 303.40), SIMDE_FLOAT64_C( 928.78)),
1986 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 537.28), SIMDE_FLOAT64_C( 982.90)), simde_x_mm_setone_pd()) },
1987 { simde_mm_set_pd(SIMDE_FLOAT64_C( -169.84), SIMDE_FLOAT64_C( -696.10)),
1988 simde_mm_set_pd(SIMDE_FLOAT64_C( -302.24), SIMDE_FLOAT64_C( -382.83)),
1989 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -169.84), SIMDE_FLOAT64_C( -696.10)), simde_mm_setzero_pd()) }
1990 };
1991
1992 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1993 simde__m128d r = simde_mm_cmpnle_sd(test_vec[i].a, test_vec[i].b);
1994 simde_assert_m128d_equal(r, test_vec[i].r);
1995 }
1996
1997 return 0;
1998 }
1999
2000 static int
test_simde_mm_cmpgt_epi8(SIMDE_MUNIT_TEST_ARGS)2001 test_simde_mm_cmpgt_epi8(SIMDE_MUNIT_TEST_ARGS) {
2002 const struct {
2003 simde__m128i a;
2004 simde__m128i b;
2005 simde__m128i r;
2006 } test_vec[8] = {
2007 { simde_mm_set_epi8(INT8_C( 13), INT8_C( -99), INT8_C(-128), INT8_C( 91), INT8_C( -96), INT8_C( 103), INT8_C(-104), INT8_C(-110),
2008 INT8_C( -46), INT8_C( -5), INT8_C( 62), INT8_C(-125), INT8_C( -51), INT8_C( -65), INT8_C(-102), INT8_C( -14)),
2009 simde_mm_set_epi8(INT8_C( 10), INT8_C( -84), INT8_C( 90), INT8_C(-110), INT8_C( 113), INT8_C( -34), INT8_C( -75), INT8_C(-110),
2010 INT8_C( -79), INT8_C(-114), INT8_C( 26), INT8_C(-127), INT8_C( -5), INT8_C( -9), INT8_C(-102), INT8_C( -38)),
2011 simde_mm_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0),
2012 INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) },
2013 { simde_mm_set_epi8(INT8_C(-110), INT8_C( -93), INT8_C( 106), INT8_C( -55), INT8_C( 91), INT8_C( -78), INT8_C( 69), INT8_C( 62),
2014 INT8_C( 38), INT8_C(-101), INT8_C( 86), INT8_C(-107), INT8_C( 114), INT8_C( 120), INT8_C(-118), INT8_C( 101)),
2015 simde_mm_set_epi8(INT8_C( 58), INT8_C( -88), INT8_C( 75), INT8_C( -55), INT8_C( 92), INT8_C( 51), INT8_C(-109), INT8_C( 62),
2016 INT8_C( 123), INT8_C( -42), INT8_C( 0), INT8_C( 40), INT8_C( 114), INT8_C(-115), INT8_C( 34), INT8_C( 101)),
2017 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0),
2018 INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0)) },
2019 { simde_mm_set_epi8(INT8_C( 79), INT8_C( -2), INT8_C(-126), INT8_C(-121), INT8_C( 71), INT8_C( -59), INT8_C( 95), INT8_C( 38),
2020 INT8_C( -95), INT8_C( 103), INT8_C( -55), INT8_C( -42), INT8_C(-124), INT8_C( -82), INT8_C( 102), INT8_C( 97)),
2021 simde_mm_set_epi8(INT8_C( -39), INT8_C( -59), INT8_C(-126), INT8_C(-107), INT8_C(-111), INT8_C( 122), INT8_C( -55), INT8_C( 87),
2022 INT8_C( -95), INT8_C( -99), INT8_C( 56), INT8_C( 120), INT8_C( 107), INT8_C( -79), INT8_C( -9), INT8_C( -36)),
2023 simde_mm_set_epi8(INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0),
2024 INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1)) },
2025 { simde_mm_set_epi8(INT8_C( -68), INT8_C( 7), INT8_C( -32), INT8_C( 120), INT8_C(-106), INT8_C(-127), INT8_C( 37), INT8_C( 95),
2026 INT8_C( -77), INT8_C(-126), INT8_C(-111), INT8_C( -96), INT8_C( 67), INT8_C( 43), INT8_C(-123), INT8_C( 21)),
2027 simde_mm_set_epi8(INT8_C( 72), INT8_C( 68), INT8_C( 76), INT8_C( -22), INT8_C( -11), INT8_C( 34), INT8_C( 112), INT8_C( 95),
2028 INT8_C( -77), INT8_C( 36), INT8_C( 119), INT8_C( -59), INT8_C( -49), INT8_C( -22), INT8_C(-125), INT8_C( 21)),
2029 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
2030 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( -1), INT8_C( 0)) },
2031 { simde_mm_set_epi8(INT8_C( -18), INT8_C( 13), INT8_C( 66), INT8_C( -52), INT8_C( -92), INT8_C( 28), INT8_C(-122), INT8_C( -12),
2032 INT8_C( -60), INT8_C( 125), INT8_C(-104), INT8_C(-118), INT8_C( -76), INT8_C( 42), INT8_C( -48), INT8_C(-120)),
2033 simde_mm_set_epi8(INT8_C( -17), INT8_C( 13), INT8_C( 66), INT8_C( -64), INT8_C( -92), INT8_C( 114), INT8_C(-119), INT8_C(-106),
2034 INT8_C( 78), INT8_C(-125), INT8_C( 88), INT8_C( -88), INT8_C( 101), INT8_C( 42), INT8_C( -58), INT8_C( -8)),
2035 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1),
2036 INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0)) },
2037 { simde_mm_set_epi8(INT8_C(-112), INT8_C( -16), INT8_C( 96), INT8_C( -64), INT8_C( 75), INT8_C( 56), INT8_C( -96), INT8_C( 96),
2038 INT8_C( -8), INT8_C( 16), INT8_C( 95), INT8_C( 41), INT8_C( 62), INT8_C( -2), INT8_C(-105), INT8_C(-101)),
2039 simde_mm_set_epi8(INT8_C( 42), INT8_C( 7), INT8_C( 90), INT8_C( -93), INT8_C( 75), INT8_C( 14), INT8_C( -5), INT8_C( 61),
2040 INT8_C( -8), INT8_C( -49), INT8_C( 95), INT8_C( 82), INT8_C( -93), INT8_C( -80), INT8_C( 6), INT8_C( -48)),
2041 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1),
2042 INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0)) },
2043 { simde_mm_set_epi8(INT8_C( -56), INT8_C( -85), INT8_C( 9), INT8_C( 3), INT8_C( 32), INT8_C(-105), INT8_C( 93), INT8_C( -78),
2044 INT8_C(-113), INT8_C( 96), INT8_C( 61), INT8_C( 14), INT8_C( -92), INT8_C( 53), INT8_C( 51), INT8_C( -7)),
2045 simde_mm_set_epi8(INT8_C( 15), INT8_C( 100), INT8_C( 9), INT8_C( 70), INT8_C(-115), INT8_C(-105), INT8_C( 14), INT8_C( -41),
2046 INT8_C(-113), INT8_C( -54), INT8_C( -38), INT8_C( 14), INT8_C( -53), INT8_C( 5), INT8_C(-127), INT8_C( -7)),
2047 simde_mm_set_epi8(INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( 0),
2048 INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0)) },
2049 { simde_mm_set_epi8(INT8_C( 120), INT8_C( 38), INT8_C( 44), INT8_C( 103), INT8_C( 33), INT8_C( -93), INT8_C(-102), INT8_C( -46),
2050 INT8_C( 47), INT8_C( 7), INT8_C( 120), INT8_C( 102), INT8_C( -87), INT8_C( -84), INT8_C( 92), INT8_C( 87)),
2051 simde_mm_set_epi8(INT8_C( -11), INT8_C( 89), INT8_C( 26), INT8_C( 69), INT8_C( 108), INT8_C( 127), INT8_C(-102), INT8_C( 49),
2052 INT8_C( 53), INT8_C( 57), INT8_C( 120), INT8_C( -23), INT8_C( -87), INT8_C( -84), INT8_C( 113), INT8_C( -36)),
2053 simde_mm_set_epi8(INT8_C( -1), INT8_C( 0), INT8_C( -1), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( 0),
2054 INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1), INT8_C( 0), INT8_C( 0), INT8_C( 0), INT8_C( -1)) }
2055 };
2056
2057 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
2058 simde__m128i r = simde_mm_cmpgt_epi8(test_vec[i].a, test_vec[i].b);
2059 simde_assert_m128i_i8(r, ==, test_vec[i].r);
2060 }
2061
2062 return 0;
2063 }
2064
2065 static int
test_simde_mm_cmpgt_epi16(SIMDE_MUNIT_TEST_ARGS)2066 test_simde_mm_cmpgt_epi16(SIMDE_MUNIT_TEST_ARGS) {
2067 const struct {
2068 simde__m128i a;
2069 simde__m128i b;
2070 simde__m128i r;
2071 } test_vec[8] = {
2072 { simde_mm_set_epi16(INT16_C( 11481), INT16_C(-31028), INT16_C(-28938), INT16_C( 3434),
2073 INT16_C( 2523), INT16_C(-16298), INT16_C(-20752), INT16_C( -3418)),
2074 simde_mm_set_epi16(INT16_C( 11481), INT16_C(-30562), INT16_C( 4762), INT16_C( -6519),
2075 INT16_C( 2523), INT16_C( 9845), INT16_C( -18), INT16_C( -5787)),
2076 simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1),
2077 INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) },
2078 { simde_mm_set_epi16(INT16_C( -3487), INT16_C( -2281), INT16_C( 2722), INT16_C(-23699),
2079 INT16_C( -5087), INT16_C( 24907), INT16_C( 26126), INT16_C( 26357)),
2080 simde_mm_set_epi16(INT16_C( 32178), INT16_C(-24562), INT16_C( -3261), INT16_C(-23699),
2081 INT16_C( 2431), INT16_C(-16600), INT16_C( -5679), INT16_C(-12625)),
2082 simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0),
2083 INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)) },
2084 { simde_mm_set_epi16(INT16_C( 28908), INT16_C( -8639), INT16_C(-27999), INT16_C(-19726),
2085 INT16_C( 28446), INT16_C( -947), INT16_C( -9756), INT16_C(-32088)),
2086 simde_mm_set_epi16(INT16_C(-24056), INT16_C(-13026), INT16_C(-27999), INT16_C( 27584),
2087 INT16_C(-22292), INT16_C( 18403), INT16_C(-15329), INT16_C( 30515)),
2088 simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0),
2089 INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( 0)) },
2090 { simde_mm_set_epi16(INT16_C( 32500), INT16_C( 28770), INT16_C(-12789), INT16_C( 764),
2091 INT16_C(-17186), INT16_C( 5823), INT16_C( 5923), INT16_C(-14898)),
2092 simde_mm_set_epi16(INT16_C( 5264), INT16_C(-27897), INT16_C(-22472), INT16_C(-17764),
2093 INT16_C( 20191), INT16_C( 20077), INT16_C(-20539), INT16_C( -7345)),
2094 simde_mm_set_epi16(INT16_C( -1), INT16_C( -1), INT16_C( -1), INT16_C( -1),
2095 INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0)) },
2096 { simde_mm_set_epi16(INT16_C(-32420), INT16_C(-10018), INT16_C( 10034), INT16_C( 21195),
2097 INT16_C( 23576), INT16_C( 23578), INT16_C( 27261), INT16_C( 22728)),
2098 simde_mm_set_epi16(INT16_C(-22785), INT16_C( 9581), INT16_C( -7653), INT16_C(-22519),
2099 INT16_C( 2089), INT16_C( 10927), INT16_C( 31136), INT16_C( 28081)),
2100 simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1),
2101 INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0)) },
2102 { simde_mm_set_epi16(INT16_C( -8613), INT16_C( 14814), INT16_C( 25977), INT16_C(-32026),
2103 INT16_C(-14164), INT16_C( 15788), INT16_C( 26276), INT16_C(-23351)),
2104 simde_mm_set_epi16(INT16_C( 18907), INT16_C( 31050), INT16_C( 25483), INT16_C( -1544),
2105 INT16_C(-22377), INT16_C(-30002), INT16_C( 26276), INT16_C(-21368)),
2106 simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( 0),
2107 INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0)) },
2108 { simde_mm_set_epi16(INT16_C( -8616), INT16_C( 18300), INT16_C(-13448), INT16_C(-25384),
2109 INT16_C(-20778), INT16_C( 9404), INT16_C( 18457), INT16_C(-13013)),
2110 simde_mm_set_epi16(INT16_C( 28965), INT16_C(-22807), INT16_C( 20081), INT16_C(-25384),
2111 INT16_C( 21664), INT16_C(-19420), INT16_C(-10494), INT16_C( 8092)),
2112 simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0),
2113 INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) },
2114 { simde_mm_set_epi16(INT16_C(-19643), INT16_C( 19578), INT16_C(-31344), INT16_C(-10120),
2115 INT16_C( -1042), INT16_C( 26214), INT16_C( 7476), INT16_C( 19171)),
2116 simde_mm_set_epi16(INT16_C( 3338), INT16_C(-31811), INT16_C( 23264), INT16_C( 16135),
2117 INT16_C( 10963), INT16_C( 28585), INT16_C( 10267), INT16_C( 15982)),
2118 simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0),
2119 INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) }
2120 };
2121
2122 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2123 simde__m128i r = simde_mm_cmpgt_epi16(test_vec[i].a, test_vec[i].b);
2124 simde_assert_m128i_i16(r, ==, test_vec[i].r);
2125 }
2126
2127 return 0;
2128 }
2129
2130 static int
test_simde_mm_cmpgt_epi32(SIMDE_MUNIT_TEST_ARGS)2131 test_simde_mm_cmpgt_epi32(SIMDE_MUNIT_TEST_ARGS) {
2132 const struct {
2133 simde__m128i a;
2134 simde__m128i b;
2135 simde__m128i r;
2136 } test_vec[8] = {
2137 { simde_mm_set_epi32( 752453324, -1896477334, 165396566, -1359940954),
2138 simde_mm_set_epi32( 752453324, 312141449, -1431099787, -1119899),
2139 simde_mm_set_epi32( 0, 0, -1, 0) },
2140 { simde_mm_set_epi32( 107153560, 1681238316, -2021152487, -1327623679),
2141 simde_mm_set_epi32( -228460777, 178430829, -333356725, 1712219893),
2142 simde_mm_set_epi32( -1, -1, 0, 0) },
2143 { simde_mm_set_epi32( -899341348, -1183976764, 50756911, -774436817),
2144 simde_mm_set_epi32( -899341348, -1675909702, 50756911, 393145285),
2145 simde_mm_set_epi32( 0, -1, 0, 0) },
2146 { simde_mm_set_epi32(-1576481506, 693332928, -1460910109, -1004570829),
2147 simde_mm_set_epi32(-1038801032, -1159952439, -1460910109, -43665635),
2148 simde_mm_set_epi32( 0, -1, 0, 0) },
2149 { simde_mm_set_epi32( 2129948770, -838139140, -1126295873, 388220366),
2150 simde_mm_set_epi32( 345019143, -1472677220, 1323257453, -1345985713),
2151 simde_mm_set_epi32( -1, -1, 0, -1) },
2152 { simde_mm_set_epi32( 324758156, 1228690576, -1773311089, 254589418),
2153 simde_mm_set_epi32(-2124621602, 1228690576, 1545100314, 1786599624),
2154 simde_mm_set_epi32( -1, 0, 0, 0) },
2155 { simde_mm_set_epi32(-1939857174, 351576089, 62939556, -1061610170),
2156 simde_mm_set_epi32(-1899113305, 1851167226, 62939556, -2109881445),
2157 simde_mm_set_epi32( 0, 0, 0, -1) },
2158 { simde_mm_set_epi32( 1239120202, 1670117880, -1466463538, 1932307592),
2159 simde_mm_set_epi32( 1694384857, 79202881, -114087446, -617386644),
2160 simde_mm_set_epi32( 0, -1, 0, -1) }
2161 };
2162
2163 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2164 simde__m128i r = simde_mm_cmpgt_epi32(test_vec[i].a, test_vec[i].b);
2165 simde_assert_m128i_i32(r, ==, test_vec[i].r);
2166 }
2167
2168 return 0;
2169 }
2170
2171 static int
test_simde_mm_cmpgt_pd(SIMDE_MUNIT_TEST_ARGS)2172 test_simde_mm_cmpgt_pd(SIMDE_MUNIT_TEST_ARGS) {
2173 const struct {
2174 simde__m128d a;
2175 simde__m128d b;
2176 simde__m128d r;
2177 } test_vec[8] = {
2178 { simde_mm_set_pd(SIMDE_FLOAT64_C( -649.61), SIMDE_FLOAT64_C( 366.73)),
2179 simde_mm_set_pd(SIMDE_FLOAT64_C( 333.59), SIMDE_FLOAT64_C( 116.88)),
2180 simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2181 { simde_mm_set_pd(SIMDE_FLOAT64_C( -619.22), SIMDE_FLOAT64_C( -854.65)),
2182 simde_mm_set_pd(SIMDE_FLOAT64_C( -854.79), SIMDE_FLOAT64_C( 863.33)),
2183 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2184 { simde_mm_set_pd(SIMDE_FLOAT64_C( -950.10), SIMDE_FLOAT64_C( 381.78)),
2185 simde_mm_set_pd(SIMDE_FLOAT64_C( 844.77), SIMDE_FLOAT64_C( -217.11)),
2186 simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2187 { simde_mm_set_pd(SIMDE_FLOAT64_C( -925.79), SIMDE_FLOAT64_C( -916.91)),
2188 simde_mm_set_pd(SIMDE_FLOAT64_C( -17.99), SIMDE_FLOAT64_C( 826.72)),
2189 simde_mm_setzero_pd() },
2190 { simde_mm_set_pd(SIMDE_FLOAT64_C( 581.21), SIMDE_FLOAT64_C( 639.37)),
2191 simde_mm_set_pd(SIMDE_FLOAT64_C( 581.21), SIMDE_FLOAT64_C( 448.67)),
2192 simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2193 { simde_mm_set_pd(SIMDE_FLOAT64_C( 702.29), SIMDE_FLOAT64_C( -582.84)),
2194 simde_mm_set_pd(SIMDE_FLOAT64_C( 702.29), SIMDE_FLOAT64_C( 186.24)),
2195 simde_mm_setzero_pd() },
2196 { simde_mm_set_pd(SIMDE_FLOAT64_C( 532.21), SIMDE_FLOAT64_C( 145.56)),
2197 simde_mm_set_pd(SIMDE_FLOAT64_C( -677.14), SIMDE_FLOAT64_C( 145.56)),
2198 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2199 { simde_mm_set_pd(SIMDE_FLOAT64_C( 459.86), SIMDE_FLOAT64_C( 265.89)),
2200 simde_mm_set_pd(SIMDE_FLOAT64_C( -130.43), SIMDE_FLOAT64_C( 334.48)),
2201 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }
2202 };
2203
2204 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2205 simde__m128d r = simde_mm_cmpgt_pd(test_vec[i].a, test_vec[i].b);
2206 simde_assert_m128d_equal(r, test_vec[i].r);
2207 }
2208
2209 return 0;
2210 }
2211
2212 static int
test_simde_mm_cmpgt_sd(SIMDE_MUNIT_TEST_ARGS)2213 test_simde_mm_cmpgt_sd(SIMDE_MUNIT_TEST_ARGS) {
2214 const struct {
2215 simde__m128d a;
2216 simde__m128d b;
2217 simde__m128d r;
2218 } test_vec[8] = {
2219 { simde_mm_set_pd(SIMDE_FLOAT64_C( 482.46), SIMDE_FLOAT64_C( 39.32)),
2220 simde_mm_set_pd(SIMDE_FLOAT64_C( 175.75), SIMDE_FLOAT64_C( -451.08)),
2221 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 482.46), SIMDE_FLOAT64_C( 39.32)), simde_x_mm_setone_pd()) },
2222 { simde_mm_set_pd(SIMDE_FLOAT64_C( 850.79), SIMDE_FLOAT64_C( 999.92)),
2223 simde_mm_set_pd(SIMDE_FLOAT64_C( -978.35), SIMDE_FLOAT64_C( 216.37)),
2224 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 850.79), SIMDE_FLOAT64_C( 999.92)), simde_x_mm_setone_pd()) },
2225 { simde_mm_set_pd(SIMDE_FLOAT64_C( -218.27), SIMDE_FLOAT64_C( 952.36)),
2226 simde_mm_set_pd(SIMDE_FLOAT64_C( -402.87), SIMDE_FLOAT64_C( -852.22)),
2227 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -218.27), SIMDE_FLOAT64_C( 952.36)), simde_x_mm_setone_pd()) },
2228 { simde_mm_set_pd(SIMDE_FLOAT64_C( -324.97), SIMDE_FLOAT64_C( -18.67)),
2229 simde_mm_set_pd(SIMDE_FLOAT64_C( -602.36), SIMDE_FLOAT64_C( 488.60)),
2230 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -324.97), SIMDE_FLOAT64_C( -18.67)), simde_mm_setzero_pd()) },
2231 { simde_mm_set_pd(SIMDE_FLOAT64_C( -224.88), SIMDE_FLOAT64_C( 278.88)),
2232 simde_mm_set_pd(SIMDE_FLOAT64_C( 861.73), SIMDE_FLOAT64_C( -326.54)),
2233 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -224.88), SIMDE_FLOAT64_C( 278.88)), simde_x_mm_setone_pd()) },
2234 { simde_mm_set_pd(SIMDE_FLOAT64_C( -160.74), SIMDE_FLOAT64_C( 611.30)),
2235 simde_mm_set_pd(SIMDE_FLOAT64_C( 370.13), SIMDE_FLOAT64_C( 18.16)),
2236 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -160.74), SIMDE_FLOAT64_C( 611.30)), simde_x_mm_setone_pd()) },
2237 { simde_mm_set_pd(SIMDE_FLOAT64_C( 914.20), SIMDE_FLOAT64_C( 278.69)),
2238 simde_mm_set_pd(SIMDE_FLOAT64_C( 703.64), SIMDE_FLOAT64_C( -975.84)),
2239 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 914.20), SIMDE_FLOAT64_C( 278.69)), simde_x_mm_setone_pd()) },
2240 { simde_mm_set_pd(SIMDE_FLOAT64_C( 392.89), SIMDE_FLOAT64_C( 45.41)),
2241 simde_mm_set_pd(SIMDE_FLOAT64_C( 713.78), SIMDE_FLOAT64_C( -6.71)),
2242 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 392.89), SIMDE_FLOAT64_C( 45.41)), simde_x_mm_setone_pd()) }
2243 };
2244
2245 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2246 simde__m128d r = simde_mm_cmpgt_sd(test_vec[i].a, test_vec[i].b);
2247 simde_assert_m128d_equal(r, test_vec[i].r);
2248 }
2249
2250 return 0;
2251 }
2252
2253 static int
test_simde_mm_cmpngt_pd(SIMDE_MUNIT_TEST_ARGS)2254 test_simde_mm_cmpngt_pd(SIMDE_MUNIT_TEST_ARGS) {
2255 const struct {
2256 simde__m128d a;
2257 simde__m128d b;
2258 simde__m128d r;
2259 } test_vec[8] = {
2260 { simde_mm_set_pd(SIMDE_FLOAT64_C( 65.48), SIMDE_FLOAT64_C( -195.60)),
2261 simde_mm_set_pd(SIMDE_FLOAT64_C( 65.48), SIMDE_FLOAT64_C( 18.27)),
2262 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_x_mm_setone_pd()) },
2263 { simde_mm_set_pd(SIMDE_FLOAT64_C( -563.65), SIMDE_FLOAT64_C( 884.03)),
2264 simde_mm_set_pd(SIMDE_FLOAT64_C( 467.71), SIMDE_FLOAT64_C( -906.63)),
2265 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2266 { simde_mm_set_pd(SIMDE_FLOAT64_C( -553.85), SIMDE_FLOAT64_C( 49.09)),
2267 simde_mm_set_pd(SIMDE_FLOAT64_C( 731.88), SIMDE_FLOAT64_C( 974.91)),
2268 simde_x_mm_setone_pd() },
2269 { simde_mm_set_pd(SIMDE_FLOAT64_C( 492.98), SIMDE_FLOAT64_C( 64.21)),
2270 simde_mm_set_pd(SIMDE_FLOAT64_C( -392.36), SIMDE_FLOAT64_C( -188.43)),
2271 simde_mm_setzero_pd() },
2272 { simde_mm_set_pd(SIMDE_FLOAT64_C( -248.75), SIMDE_FLOAT64_C( -404.39)),
2273 simde_mm_set_pd(SIMDE_FLOAT64_C( -495.92), SIMDE_FLOAT64_C( -819.81)),
2274 simde_mm_setzero_pd() },
2275 { simde_mm_set_pd(SIMDE_FLOAT64_C( -932.57), SIMDE_FLOAT64_C( 741.27)),
2276 simde_mm_set_pd(SIMDE_FLOAT64_C( -307.42), SIMDE_FLOAT64_C( 170.69)),
2277 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2278 { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.12), SIMDE_FLOAT64_C( -161.45)),
2279 simde_mm_set_pd(SIMDE_FLOAT64_C( -939.12), SIMDE_FLOAT64_C( -161.45)),
2280 simde_x_mm_setone_pd() },
2281 { simde_mm_set_pd(SIMDE_FLOAT64_C( -900.20), SIMDE_FLOAT64_C( -314.63)),
2282 simde_mm_set_pd(SIMDE_FLOAT64_C( 138.12), SIMDE_FLOAT64_C( 517.19)),
2283 simde_x_mm_setone_pd() }
2284 };
2285
2286 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2287 simde__m128d r = simde_mm_cmpngt_pd(test_vec[i].a, test_vec[i].b);
2288 simde_assert_m128d_equal(r, test_vec[i].r);
2289 }
2290
2291 return 0;
2292 }
2293
2294 static int
test_simde_mm_cmpngt_sd(SIMDE_MUNIT_TEST_ARGS)2295 test_simde_mm_cmpngt_sd(SIMDE_MUNIT_TEST_ARGS) {
2296 const struct {
2297 simde__m128d a;
2298 simde__m128d b;
2299 simde__m128d r;
2300 } test_vec[8] = {
2301 { simde_mm_set_pd(SIMDE_FLOAT64_C( -695.66), SIMDE_FLOAT64_C( 678.35)),
2302 simde_mm_set_pd(SIMDE_FLOAT64_C( 356.43), SIMDE_FLOAT64_C( 495.31)),
2303 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -695.66), SIMDE_FLOAT64_C( 678.35)), simde_mm_setzero_pd()) },
2304 { simde_mm_set_pd(SIMDE_FLOAT64_C( -72.63), SIMDE_FLOAT64_C( 895.56)),
2305 simde_mm_set_pd(SIMDE_FLOAT64_C( -885.88), SIMDE_FLOAT64_C( 947.04)),
2306 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -72.63), SIMDE_FLOAT64_C( 895.56)), simde_x_mm_setone_pd()) },
2307 { simde_mm_set_pd(SIMDE_FLOAT64_C( 72.92), SIMDE_FLOAT64_C( -711.12)),
2308 simde_mm_set_pd(SIMDE_FLOAT64_C( -242.49), SIMDE_FLOAT64_C( -686.51)),
2309 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 72.92), SIMDE_FLOAT64_C( -711.12)), simde_x_mm_setone_pd()) },
2310 { simde_mm_set_pd(SIMDE_FLOAT64_C( 520.17), SIMDE_FLOAT64_C( 176.32)),
2311 simde_mm_set_pd(SIMDE_FLOAT64_C( -442.78), SIMDE_FLOAT64_C( -956.19)),
2312 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 520.17), SIMDE_FLOAT64_C( 176.32)), simde_mm_setzero_pd()) },
2313 { simde_mm_set_pd(SIMDE_FLOAT64_C( 949.11), SIMDE_FLOAT64_C( 112.35)),
2314 simde_mm_set_pd(SIMDE_FLOAT64_C( -212.07), SIMDE_FLOAT64_C( 851.84)),
2315 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 949.11), SIMDE_FLOAT64_C( 112.35)), simde_x_mm_setone_pd()) },
2316 { simde_mm_set_pd(SIMDE_FLOAT64_C( -142.01), SIMDE_FLOAT64_C( -216.70)),
2317 simde_mm_set_pd(SIMDE_FLOAT64_C( -467.63), SIMDE_FLOAT64_C( 481.36)),
2318 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -142.01), SIMDE_FLOAT64_C( -216.70)), simde_x_mm_setone_pd()) },
2319 { simde_mm_set_pd(SIMDE_FLOAT64_C( 918.90), SIMDE_FLOAT64_C( 481.59)),
2320 simde_mm_set_pd(SIMDE_FLOAT64_C( -147.11), SIMDE_FLOAT64_C( 677.03)),
2321 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 918.90), SIMDE_FLOAT64_C( 481.59)), simde_x_mm_setone_pd()) },
2322 { simde_mm_set_pd(SIMDE_FLOAT64_C( -143.49), SIMDE_FLOAT64_C( 447.22)),
2323 simde_mm_set_pd(SIMDE_FLOAT64_C( 50.06), SIMDE_FLOAT64_C( 827.25)),
2324 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -143.49), SIMDE_FLOAT64_C( 447.22)), simde_x_mm_setone_pd()) }
2325 };
2326
2327 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2328 simde__m128d r = simde_mm_cmpngt_sd(test_vec[i].a, test_vec[i].b);
2329 simde_assert_m128d_equal(r, test_vec[i].r);
2330 }
2331
2332 return 0;
2333 }
2334
2335 static int
test_simde_mm_cmpge_pd(SIMDE_MUNIT_TEST_ARGS)2336 test_simde_mm_cmpge_pd(SIMDE_MUNIT_TEST_ARGS) {
2337 const struct {
2338 simde__m128d a;
2339 simde__m128d b;
2340 simde__m128d r;
2341 } test_vec[8] = {
2342 { simde_mm_set_pd(SIMDE_FLOAT64_C( -649.61), SIMDE_FLOAT64_C( 366.73)),
2343 simde_mm_set_pd(SIMDE_FLOAT64_C( 333.59), SIMDE_FLOAT64_C( 116.88)),
2344 simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2345 { simde_mm_set_pd(SIMDE_FLOAT64_C( -619.22), SIMDE_FLOAT64_C( -854.65)),
2346 simde_mm_set_pd(SIMDE_FLOAT64_C( -854.79), SIMDE_FLOAT64_C( 863.33)),
2347 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2348 { simde_mm_set_pd(SIMDE_FLOAT64_C( -950.10), SIMDE_FLOAT64_C( 381.78)),
2349 simde_mm_set_pd(SIMDE_FLOAT64_C( 844.77), SIMDE_FLOAT64_C( -217.11)),
2350 simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2351 { simde_mm_set_pd(SIMDE_FLOAT64_C( -925.79), SIMDE_FLOAT64_C( -916.91)),
2352 simde_mm_set_pd(SIMDE_FLOAT64_C( -17.99), SIMDE_FLOAT64_C( 826.72)),
2353 simde_mm_setzero_pd() },
2354 { simde_mm_set_pd(SIMDE_FLOAT64_C( 581.21), SIMDE_FLOAT64_C( 639.37)),
2355 simde_mm_set_pd(SIMDE_FLOAT64_C( 581.21), SIMDE_FLOAT64_C( 448.67)),
2356 simde_x_mm_setone_pd() },
2357 { simde_mm_set_pd(SIMDE_FLOAT64_C( 702.29), SIMDE_FLOAT64_C( -582.84)),
2358 simde_mm_set_pd(SIMDE_FLOAT64_C( 702.29), SIMDE_FLOAT64_C( 186.24)),
2359 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2360 { simde_mm_set_pd(SIMDE_FLOAT64_C( 532.21), SIMDE_FLOAT64_C( 145.56)),
2361 simde_mm_set_pd(SIMDE_FLOAT64_C( -677.14), SIMDE_FLOAT64_C( 145.56)),
2362 simde_x_mm_setone_pd() },
2363 { simde_mm_set_pd(SIMDE_FLOAT64_C( 459.86), SIMDE_FLOAT64_C( 265.89)),
2364 simde_mm_set_pd(SIMDE_FLOAT64_C( -130.43), SIMDE_FLOAT64_C( 334.48)),
2365 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }
2366 };
2367
2368 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2369 simde__m128d r = simde_mm_cmpge_pd(test_vec[i].a, test_vec[i].b);
2370 simde_assert_m128d_equal(r, test_vec[i].r);
2371 }
2372
2373 return 0;
2374 }
2375
2376 static int
test_simde_mm_cmpge_sd(SIMDE_MUNIT_TEST_ARGS)2377 test_simde_mm_cmpge_sd(SIMDE_MUNIT_TEST_ARGS) {
2378 const struct {
2379 simde__m128d a;
2380 simde__m128d b;
2381 simde__m128d r;
2382 } test_vec[8] = {
2383 { simde_mm_set_pd(SIMDE_FLOAT64_C( -315.45), SIMDE_FLOAT64_C( 193.79)),
2384 simde_mm_set_pd(SIMDE_FLOAT64_C( -204.45), SIMDE_FLOAT64_C( 887.13)),
2385 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -315.45), SIMDE_FLOAT64_C( 193.79)), simde_mm_setzero_pd()) },
2386 { simde_mm_set_pd(SIMDE_FLOAT64_C( -670.47), SIMDE_FLOAT64_C( 937.31)),
2387 simde_mm_set_pd(SIMDE_FLOAT64_C( 343.22), SIMDE_FLOAT64_C( -308.01)),
2388 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -670.47), SIMDE_FLOAT64_C( 937.31)), simde_x_mm_setone_pd()) },
2389 { simde_mm_set_pd(SIMDE_FLOAT64_C( -903.63), SIMDE_FLOAT64_C( -850.53)),
2390 simde_mm_set_pd(SIMDE_FLOAT64_C( -838.64), SIMDE_FLOAT64_C( -936.46)),
2391 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -903.63), SIMDE_FLOAT64_C( -850.53)), simde_x_mm_setone_pd()) },
2392 { simde_mm_set_pd(SIMDE_FLOAT64_C( 236.33), SIMDE_FLOAT64_C( 126.98)),
2393 simde_mm_set_pd(SIMDE_FLOAT64_C( 872.82), SIMDE_FLOAT64_C( -512.42)),
2394 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 236.33), SIMDE_FLOAT64_C( 126.98)), simde_x_mm_setone_pd()) },
2395 { simde_mm_set_pd(SIMDE_FLOAT64_C( 811.87), SIMDE_FLOAT64_C( -15.62)),
2396 simde_mm_set_pd(SIMDE_FLOAT64_C( -983.99), SIMDE_FLOAT64_C( 351.32)),
2397 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 811.87), SIMDE_FLOAT64_C( -15.62)), simde_mm_setzero_pd()) },
2398 { simde_mm_set_pd(SIMDE_FLOAT64_C( 42.47), SIMDE_FLOAT64_C( -523.00)),
2399 simde_mm_set_pd(SIMDE_FLOAT64_C( 286.68), SIMDE_FLOAT64_C( 254.00)),
2400 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 42.47), SIMDE_FLOAT64_C( -523.00)), simde_mm_setzero_pd()) },
2401 { simde_mm_set_pd(SIMDE_FLOAT64_C( -219.91), SIMDE_FLOAT64_C( -253.29)),
2402 simde_mm_set_pd(SIMDE_FLOAT64_C( -554.73), SIMDE_FLOAT64_C( 225.44)),
2403 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -219.91), SIMDE_FLOAT64_C( -253.29)), simde_mm_setzero_pd()) },
2404 { simde_mm_set_pd(SIMDE_FLOAT64_C( -901.30), SIMDE_FLOAT64_C( -538.38)),
2405 simde_mm_set_pd(SIMDE_FLOAT64_C( -584.99), SIMDE_FLOAT64_C( 91.26)),
2406 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -901.30), SIMDE_FLOAT64_C( -538.38)), simde_mm_setzero_pd()) }
2407 };
2408
2409 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2410 simde__m128d r = simde_mm_cmpge_sd(test_vec[i].a, test_vec[i].b);
2411 simde_assert_m128d_equal(r, test_vec[i].r);
2412 }
2413
2414 return 0;
2415 }
2416
2417 static int
test_simde_mm_cmpnge_pd(SIMDE_MUNIT_TEST_ARGS)2418 test_simde_mm_cmpnge_pd(SIMDE_MUNIT_TEST_ARGS) {
2419 const struct {
2420 simde__m128d a;
2421 simde__m128d b;
2422 simde__m128d r;
2423 } test_vec[8] = {
2424 { simde_mm_set_pd(SIMDE_FLOAT64_C( 65.48), SIMDE_FLOAT64_C( -195.60)),
2425 simde_mm_set_pd(SIMDE_FLOAT64_C( 65.48), SIMDE_FLOAT64_C( 18.27)),
2426 simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2427 { simde_mm_set_pd(SIMDE_FLOAT64_C( -563.65), SIMDE_FLOAT64_C( 884.03)),
2428 simde_mm_set_pd(SIMDE_FLOAT64_C( 467.71), SIMDE_FLOAT64_C( -906.63)),
2429 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2430 { simde_mm_set_pd(SIMDE_FLOAT64_C( -553.85), SIMDE_FLOAT64_C( 49.09)),
2431 simde_mm_set_pd(SIMDE_FLOAT64_C( 731.88), SIMDE_FLOAT64_C( 974.91)),
2432 simde_x_mm_setone_pd() },
2433 { simde_mm_set_pd(SIMDE_FLOAT64_C( 492.98), SIMDE_FLOAT64_C( 64.21)),
2434 simde_mm_set_pd(SIMDE_FLOAT64_C( -392.36), SIMDE_FLOAT64_C( -188.43)),
2435 simde_mm_setzero_pd() },
2436 { simde_mm_set_pd(SIMDE_FLOAT64_C( -248.75), SIMDE_FLOAT64_C( -404.39)),
2437 simde_mm_set_pd(SIMDE_FLOAT64_C( -495.92), SIMDE_FLOAT64_C( -819.81)),
2438 simde_mm_setzero_pd() },
2439 { simde_mm_set_pd(SIMDE_FLOAT64_C( -932.57), SIMDE_FLOAT64_C( 741.27)),
2440 simde_mm_set_pd(SIMDE_FLOAT64_C( -307.42), SIMDE_FLOAT64_C( 170.69)),
2441 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2442 { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.12), SIMDE_FLOAT64_C( -161.45)),
2443 simde_mm_set_pd(SIMDE_FLOAT64_C( -939.12), SIMDE_FLOAT64_C( -161.45)),
2444 simde_mm_setzero_pd() },
2445 { simde_mm_set_pd(SIMDE_FLOAT64_C( -900.20), SIMDE_FLOAT64_C( -314.63)),
2446 simde_mm_set_pd(SIMDE_FLOAT64_C( 138.12), SIMDE_FLOAT64_C( 517.19)),
2447 simde_x_mm_setone_pd() }
2448 };
2449
2450 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2451 simde__m128d r = simde_mm_cmpnge_pd(test_vec[i].a, test_vec[i].b);
2452 simde_assert_m128d_equal(r, test_vec[i].r);
2453 }
2454
2455 return 0;
2456 }
2457
2458 static int
test_simde_mm_cmpnge_sd(SIMDE_MUNIT_TEST_ARGS)2459 test_simde_mm_cmpnge_sd(SIMDE_MUNIT_TEST_ARGS) {
2460 const struct {
2461 simde__m128d a;
2462 simde__m128d b;
2463 simde__m128d r;
2464 } test_vec[8] = {
2465 { simde_mm_set_pd(SIMDE_FLOAT64_C( -695.66), SIMDE_FLOAT64_C( 678.35)),
2466 simde_mm_set_pd(SIMDE_FLOAT64_C( 356.43), SIMDE_FLOAT64_C( 495.31)),
2467 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -695.66), SIMDE_FLOAT64_C( 678.35)), simde_mm_setzero_pd()) },
2468 { simde_mm_set_pd(SIMDE_FLOAT64_C( -72.63), SIMDE_FLOAT64_C( 895.56)),
2469 simde_mm_set_pd(SIMDE_FLOAT64_C( -885.88), SIMDE_FLOAT64_C( 947.04)),
2470 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -72.63), SIMDE_FLOAT64_C( 895.56)), simde_x_mm_setone_pd()) },
2471 { simde_mm_set_pd(SIMDE_FLOAT64_C( 72.92), SIMDE_FLOAT64_C( -711.12)),
2472 simde_mm_set_pd(SIMDE_FLOAT64_C( -242.49), SIMDE_FLOAT64_C( -686.51)),
2473 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 72.92), SIMDE_FLOAT64_C( -711.12)), simde_x_mm_setone_pd()) },
2474 { simde_mm_set_pd(SIMDE_FLOAT64_C( 520.17), SIMDE_FLOAT64_C( 176.32)),
2475 simde_mm_set_pd(SIMDE_FLOAT64_C( -442.78), SIMDE_FLOAT64_C( -956.19)),
2476 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 520.17), SIMDE_FLOAT64_C( 176.32)), simde_mm_setzero_pd()) },
2477 { simde_mm_set_pd(SIMDE_FLOAT64_C( 949.11), SIMDE_FLOAT64_C( 112.35)),
2478 simde_mm_set_pd(SIMDE_FLOAT64_C( -212.07), SIMDE_FLOAT64_C( 851.84)),
2479 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 949.11), SIMDE_FLOAT64_C( 112.35)), simde_x_mm_setone_pd()) },
2480 { simde_mm_set_pd(SIMDE_FLOAT64_C( -142.01), SIMDE_FLOAT64_C( -216.70)),
2481 simde_mm_set_pd(SIMDE_FLOAT64_C( -467.63), SIMDE_FLOAT64_C( 481.36)),
2482 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -142.01), SIMDE_FLOAT64_C( -216.70)), simde_x_mm_setone_pd()) },
2483 { simde_mm_set_pd(SIMDE_FLOAT64_C( 918.90), SIMDE_FLOAT64_C( 481.59)),
2484 simde_mm_set_pd(SIMDE_FLOAT64_C( -147.11), SIMDE_FLOAT64_C( 677.03)),
2485 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 918.90), SIMDE_FLOAT64_C( 481.59)), simde_x_mm_setone_pd()) },
2486 { simde_mm_set_pd(SIMDE_FLOAT64_C( -143.49), SIMDE_FLOAT64_C( 447.22)),
2487 simde_mm_set_pd(SIMDE_FLOAT64_C( 50.06), SIMDE_FLOAT64_C( 827.25)),
2488 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -143.49), SIMDE_FLOAT64_C( 447.22)), simde_x_mm_setone_pd()) }
2489 };
2490
2491 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2492 simde__m128d r = simde_mm_cmpnge_sd(test_vec[i].a, test_vec[i].b);
2493 simde_assert_m128d_equal(r, test_vec[i].r);
2494 }
2495
2496 return 0;
2497 }
2498
2499 static int
test_simde_mm_cmpord_pd(SIMDE_MUNIT_TEST_ARGS)2500 test_simde_mm_cmpord_pd(SIMDE_MUNIT_TEST_ARGS) {
2501 const struct {
2502 simde__m128d a;
2503 simde__m128d b;
2504 simde__m128d r;
2505 } test_vec[8] = {
2506 { simde_mm_set_pd(SIMDE_FLOAT64_C( 107.72), SIMDE_FLOAT64_C( -915.48)),
2507 simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -303.84)),
2508 simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2509 { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 173.20)),
2510 simde_mm_set_pd(SIMDE_FLOAT64_C( -817.33), SIMDE_FLOAT64_C( 659.40)),
2511 simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2512 { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_MATH_NAN),
2513 simde_mm_set_pd(SIMDE_FLOAT64_C( -425.32), SIMDE_FLOAT64_C( 993.95)),
2514 simde_mm_setzero_pd() },
2515 { simde_mm_set_pd(SIMDE_FLOAT64_C( -650.75), SIMDE_MATH_NAN),
2516 simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -971.81)),
2517 simde_mm_setzero_pd() },
2518 { simde_mm_set_pd(SIMDE_FLOAT64_C( -761.29), SIMDE_FLOAT64_C( -694.76)),
2519 simde_mm_set_pd(SIMDE_FLOAT64_C( -709.09), SIMDE_FLOAT64_C( 614.12)),
2520 simde_x_mm_setone_pd() },
2521 { simde_mm_set_pd(SIMDE_FLOAT64_C( 498.19), SIMDE_FLOAT64_C( -379.74)),
2522 simde_mm_set_pd(SIMDE_FLOAT64_C( -247.48), SIMDE_FLOAT64_C( -578.21)),
2523 simde_x_mm_setone_pd() },
2524 { simde_mm_set_pd(SIMDE_FLOAT64_C( 101.51), SIMDE_FLOAT64_C( 387.46)),
2525 simde_mm_set_pd(SIMDE_FLOAT64_C( 215.97), SIMDE_FLOAT64_C( 173.76)),
2526 simde_x_mm_setone_pd() },
2527 { simde_mm_set_pd(SIMDE_FLOAT64_C( 729.13), SIMDE_FLOAT64_C( 771.13)),
2528 simde_mm_set_pd(SIMDE_FLOAT64_C( 902.43), SIMDE_FLOAT64_C( -416.43)),
2529 simde_x_mm_setone_pd() }
2530 };
2531
2532 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2533 simde__m128d r = simde_mm_cmpord_pd(test_vec[i].a, test_vec[i].b);
2534 simde_assert_m128d_equal(r, test_vec[i].r);
2535 }
2536
2537 return 0;
2538 }
2539
2540 static int
test_simde_mm_cmpord_sd(SIMDE_MUNIT_TEST_ARGS)2541 test_simde_mm_cmpord_sd(SIMDE_MUNIT_TEST_ARGS) {
2542 const struct {
2543 simde__m128d a;
2544 simde__m128d b;
2545 simde__m128d r;
2546 } test_vec[] = {
2547 { simde_mm_set_pd(SIMDE_FLOAT64_C( 602.71), SIMDE_FLOAT64_C( -732.62)),
2548 simde_mm_set_pd(SIMDE_FLOAT64_C( 116.21), SIMDE_FLOAT64_C( -560.07)),
2549 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 602.71), SIMDE_FLOAT64_C( -732.62)), simde_x_mm_setone_pd()) },
2550 { simde_mm_set_pd(SIMDE_FLOAT64_C( 947.95), SIMDE_MATH_NAN),
2551 simde_mm_set_pd(SIMDE_FLOAT64_C( -66.03), SIMDE_FLOAT64_C( -86.78)),
2552 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 947.95), SIMDE_FLOAT64_C( 775.29)), simde_mm_setzero_pd()) },
2553 { simde_mm_set_pd(SIMDE_FLOAT64_C( -455.06), SIMDE_FLOAT64_C( 579.65)),
2554 simde_mm_set_pd(SIMDE_FLOAT64_C( -960.88), SIMDE_MATH_NAN),
2555 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -455.06), SIMDE_FLOAT64_C( 579.65)), simde_mm_setzero_pd()) },
2556 { simde_mm_set_pd(SIMDE_FLOAT64_C( 782.89), SIMDE_MATH_NAN),
2557 simde_mm_set_pd(SIMDE_FLOAT64_C( -540.96), SIMDE_MATH_NAN),
2558 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 782.89), SIMDE_FLOAT64_C( -266.22)), simde_mm_setzero_pd()) },
2559 { simde_mm_set_pd(SIMDE_FLOAT64_C( -750.41), SIMDE_FLOAT64_C( -624.09)),
2560 simde_mm_set_pd(SIMDE_FLOAT64_C( -599.13), SIMDE_FLOAT64_C( 704.00)),
2561 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -750.41), SIMDE_FLOAT64_C( -624.09)), simde_x_mm_setone_pd()) },
2562 { simde_mm_set_pd(SIMDE_FLOAT64_C( 149.22), SIMDE_FLOAT64_C( -876.24)),
2563 simde_mm_set_pd(SIMDE_FLOAT64_C( 871.40), SIMDE_FLOAT64_C( 321.55)),
2564 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 149.22), SIMDE_FLOAT64_C( -876.24)), simde_x_mm_setone_pd()) },
2565 { simde_mm_set_pd(SIMDE_FLOAT64_C( -822.79), SIMDE_FLOAT64_C( 890.31)),
2566 simde_mm_set_pd(SIMDE_FLOAT64_C( -260.78), SIMDE_FLOAT64_C( 386.76)),
2567 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -822.79), SIMDE_FLOAT64_C( 890.31)), simde_x_mm_setone_pd()) },
2568 { simde_mm_set_pd(SIMDE_FLOAT64_C( -370.89), SIMDE_FLOAT64_C( -622.25)),
2569 simde_mm_set_pd(SIMDE_FLOAT64_C( 587.16), SIMDE_FLOAT64_C( -811.86)),
2570 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -370.89), SIMDE_FLOAT64_C( -622.25)), simde_x_mm_setone_pd()) }
2571 };
2572
2573 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2574 simde__m128d r = simde_mm_cmpord_sd(test_vec[i].a, test_vec[i].b);
2575 simde_assert_m128d_equal(r, test_vec[i].r);
2576 }
2577
2578 return 0;
2579 }
2580
2581 static int
test_simde_mm_cmpunord_pd(SIMDE_MUNIT_TEST_ARGS)2582 test_simde_mm_cmpunord_pd(SIMDE_MUNIT_TEST_ARGS) {
2583 const struct {
2584 simde__m128d a;
2585 simde__m128d b;
2586 simde__m128d r;
2587 } test_vec[8] = {
2588 { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 670.49)),
2589 simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 826.75)),
2590 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2591 { simde_mm_set_pd(SIMDE_FLOAT64_C( -774.95), SIMDE_MATH_NAN),
2592 simde_mm_set_pd(SIMDE_FLOAT64_C( 247.71), SIMDE_MATH_NAN),
2593 simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2594 { simde_mm_set_pd(SIMDE_FLOAT64_C( -88.77), SIMDE_FLOAT64_C( 116.09)),
2595 simde_mm_set_pd(SIMDE_FLOAT64_C( -32.79), SIMDE_FLOAT64_C( -442.07)),
2596 simde_mm_setzero_pd() },
2597 { simde_mm_set_pd(SIMDE_FLOAT64_C( 71.71), SIMDE_FLOAT64_C( 549.42)),
2598 simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -288.27)),
2599 simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2600 { simde_mm_set_pd(SIMDE_FLOAT64_C( -266.24), SIMDE_FLOAT64_C( -147.24)),
2601 simde_mm_set_pd(SIMDE_FLOAT64_C( 900.46), SIMDE_FLOAT64_C( -288.71)),
2602 simde_mm_setzero_pd() },
2603 { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_MATH_NAN),
2604 simde_mm_set_pd(SIMDE_FLOAT64_C( 196.30), SIMDE_MATH_NAN),
2605 simde_x_mm_setone_pd() },
2606 { simde_mm_set_pd(SIMDE_FLOAT64_C( -447.36), SIMDE_FLOAT64_C( 236.69)),
2607 simde_mm_set_pd(SIMDE_FLOAT64_C( -774.85), SIMDE_FLOAT64_C( -611.68)),
2608 simde_mm_setzero_pd() },
2609 { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_MATH_NAN),
2610 simde_mm_set_pd(SIMDE_FLOAT64_C( 711.66), SIMDE_FLOAT64_C( -751.40)),
2611 simde_x_mm_setone_pd() }
2612 };
2613
2614 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2615 simde__m128d r = simde_mm_cmpunord_pd(test_vec[i].a, test_vec[i].b);
2616 simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(test_vec[i].r));
2617 }
2618
2619 return 0;
2620 }
2621
2622 static int
test_simde_mm_cmpunord_sd(SIMDE_MUNIT_TEST_ARGS)2623 test_simde_mm_cmpunord_sd(SIMDE_MUNIT_TEST_ARGS) {
2624 const struct {
2625 simde__m128d a;
2626 simde__m128d b;
2627 simde__m128d r;
2628 } test_vec[8] = {
2629 { simde_mm_set_pd(SIMDE_FLOAT64_C( -580.90), SIMDE_FLOAT64_C( 757.46)),
2630 simde_mm_set_pd(SIMDE_FLOAT64_C( -779.63), SIMDE_FLOAT64_C( 96.79)),
2631 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -580.90), SIMDE_FLOAT64_C( 757.46)), simde_mm_setzero_pd()) },
2632 { simde_mm_set_pd(SIMDE_FLOAT64_C( -607.89), SIMDE_MATH_NAN),
2633 simde_mm_set_pd(SIMDE_FLOAT64_C( 751.46), SIMDE_FLOAT64_C( 753.64)),
2634 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -607.89), SIMDE_FLOAT64_C( -882.75)), simde_x_mm_setone_pd()) },
2635 { simde_mm_set_pd(SIMDE_FLOAT64_C( 177.62), SIMDE_FLOAT64_C( -618.39)),
2636 simde_mm_set_pd(SIMDE_FLOAT64_C( -958.41), SIMDE_MATH_NAN),
2637 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 177.62), SIMDE_FLOAT64_C( -618.39)), simde_x_mm_setone_pd()) },
2638 { simde_mm_set_pd(SIMDE_FLOAT64_C( 910.88), SIMDE_MATH_NAN),
2639 simde_mm_set_pd(SIMDE_FLOAT64_C( -924.01), SIMDE_MATH_NAN),
2640 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 910.88), SIMDE_FLOAT64_C( 313.76)), simde_x_mm_setone_pd()) },
2641 { simde_mm_set_pd(SIMDE_FLOAT64_C( -778.12), SIMDE_FLOAT64_C( -472.40)),
2642 simde_mm_set_pd(SIMDE_FLOAT64_C( 400.92), SIMDE_FLOAT64_C( -453.41)),
2643 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -778.12), SIMDE_FLOAT64_C( -472.40)), simde_mm_setzero_pd()) },
2644 { simde_mm_set_pd(SIMDE_FLOAT64_C( 933.47), SIMDE_FLOAT64_C( -426.60)),
2645 simde_mm_set_pd(SIMDE_FLOAT64_C( 836.37), SIMDE_FLOAT64_C( 329.66)),
2646 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( 933.47), SIMDE_FLOAT64_C( -426.60)), simde_mm_setzero_pd()) },
2647 { simde_mm_set_pd(SIMDE_FLOAT64_C( -563.76), SIMDE_FLOAT64_C( 455.35)),
2648 simde_mm_set_pd(SIMDE_FLOAT64_C( -169.32), SIMDE_FLOAT64_C( -459.10)),
2649 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -563.76), SIMDE_FLOAT64_C( 455.35)), simde_mm_setzero_pd()) },
2650 { simde_mm_set_pd(SIMDE_FLOAT64_C( -117.48), SIMDE_FLOAT64_C( -934.82)),
2651 simde_mm_set_pd(SIMDE_FLOAT64_C( 177.09), SIMDE_FLOAT64_C( 194.89)),
2652 simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -117.48), SIMDE_FLOAT64_C( -934.82)), simde_mm_setzero_pd()) }
2653 };
2654
2655 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2656 simde__m128d r = simde_mm_cmpunord_sd(test_vec[i].a, test_vec[i].b);
2657 simde_assert_m128d_equal(r, test_vec[i].r);
2658 }
2659
2660 return 0;
2661 }
2662
2663 static int
test_simde_mm_comieq_sd(SIMDE_MUNIT_TEST_ARGS)2664 test_simde_mm_comieq_sd(SIMDE_MUNIT_TEST_ARGS) {
2665 const struct {
2666 simde__m128d a;
2667 simde__m128d b;
2668 int r;
2669 } test_vec[8] = {
2670 { simde_mm_set_pd(SIMDE_FLOAT64_C( -969.61), SIMDE_FLOAT64_C( 839.23)),
2671 simde_mm_set_pd(SIMDE_FLOAT64_C( -969.61), SIMDE_FLOAT64_C( -432.69)),
2672 0 },
2673 { simde_mm_set_pd(SIMDE_FLOAT64_C( 281.50), SIMDE_FLOAT64_C( -752.55)),
2674 simde_mm_set_pd(SIMDE_FLOAT64_C( 281.50), SIMDE_FLOAT64_C( -752.55)),
2675 1 },
2676 { simde_mm_set_pd(SIMDE_FLOAT64_C( 590.09), SIMDE_FLOAT64_C( 270.42)),
2677 simde_mm_set_pd(SIMDE_FLOAT64_C( -206.33), SIMDE_FLOAT64_C( 270.42)),
2678 1 },
2679 { simde_mm_set_pd(SIMDE_FLOAT64_C( 412.70), SIMDE_FLOAT64_C( -500.58)),
2680 simde_mm_set_pd(SIMDE_FLOAT64_C( 145.06), SIMDE_FLOAT64_C( 763.45)),
2681 0 },
2682 { simde_mm_set_pd(SIMDE_FLOAT64_C( -374.23), SIMDE_FLOAT64_C( 380.82)),
2683 simde_mm_set_pd(SIMDE_FLOAT64_C( -374.23), SIMDE_FLOAT64_C( 380.82)),
2684 1 },
2685 { simde_mm_set_pd(SIMDE_FLOAT64_C( -875.64), SIMDE_FLOAT64_C( 30.13)),
2686 simde_mm_set_pd(SIMDE_FLOAT64_C( -823.83), SIMDE_FLOAT64_C( 30.13)),
2687 1 },
2688 { simde_mm_set_pd(SIMDE_FLOAT64_C( 890.78), SIMDE_FLOAT64_C( -652.66)),
2689 simde_mm_set_pd(SIMDE_FLOAT64_C( 719.69), SIMDE_FLOAT64_C( -685.53)),
2690 0 },
2691 { simde_mm_set_pd(SIMDE_FLOAT64_C( 740.88), SIMDE_FLOAT64_C( 116.37)),
2692 simde_mm_set_pd(SIMDE_FLOAT64_C( -528.65), SIMDE_FLOAT64_C( 536.46)),
2693 0 }
2694 };
2695
2696 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2697 int r = simde_mm_comieq_sd(test_vec[i].a, test_vec[i].b);
2698 simde_assert_equal_i(r, test_vec[i].r);
2699 }
2700
2701 return 0;
2702 }
2703
2704 static int
test_simde_mm_comige_sd(SIMDE_MUNIT_TEST_ARGS)2705 test_simde_mm_comige_sd(SIMDE_MUNIT_TEST_ARGS) {
2706 const struct {
2707 simde__m128d a;
2708 simde__m128d b;
2709 int r;
2710 } test_vec[8] = {
2711 { simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 552.43)),
2712 simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 267.88)),
2713 1 },
2714 { simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( -921.70)),
2715 simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( 330.81)),
2716 0 },
2717 { simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( -938.03)),
2718 simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( 970.01)),
2719 0 },
2720 { simde_mm_set_pd(SIMDE_FLOAT64_C( -158.01), SIMDE_FLOAT64_C( 635.18)),
2721 simde_mm_set_pd(SIMDE_FLOAT64_C( -394.88), SIMDE_FLOAT64_C( -19.73)),
2722 1 },
2723 { simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -429.43)),
2724 simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -32.37)),
2725 0 },
2726 { simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( -241.62)),
2727 simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( 500.55)),
2728 0 },
2729 { simde_mm_set_pd(SIMDE_FLOAT64_C( -667.19), SIMDE_FLOAT64_C( 338.98)),
2730 simde_mm_set_pd(SIMDE_FLOAT64_C( 225.94), SIMDE_FLOAT64_C( 338.98)),
2731 1 },
2732 { simde_mm_set_pd(SIMDE_FLOAT64_C( 890.13), SIMDE_FLOAT64_C( -203.09)),
2733 simde_mm_set_pd(SIMDE_FLOAT64_C( -221.49), SIMDE_FLOAT64_C( 304.99)),
2734 0 }
2735 };
2736
2737 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2738 int r = simde_mm_comige_sd(test_vec[i].a, test_vec[i].b);
2739 simde_assert_equal_i(r, test_vec[i].r);
2740 }
2741
2742 return 0;
2743 }
2744
2745 static int
test_simde_mm_comigt_sd(SIMDE_MUNIT_TEST_ARGS)2746 test_simde_mm_comigt_sd(SIMDE_MUNIT_TEST_ARGS) {
2747 const struct {
2748 simde__m128d a;
2749 simde__m128d b;
2750 int r;
2751 } test_vec[8] = {
2752 { simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 552.43)),
2753 simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 267.88)),
2754 1 },
2755 { simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( -921.70)),
2756 simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( 330.81)),
2757 0 },
2758 { simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( -938.03)),
2759 simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( 970.01)),
2760 0 },
2761 { simde_mm_set_pd(SIMDE_FLOAT64_C( -158.01), SIMDE_FLOAT64_C( 635.18)),
2762 simde_mm_set_pd(SIMDE_FLOAT64_C( -394.88), SIMDE_FLOAT64_C( -19.73)),
2763 1 },
2764 { simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -429.43)),
2765 simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -32.37)),
2766 0 },
2767 { simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( -241.62)),
2768 simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( 500.55)),
2769 0 },
2770 { simde_mm_set_pd(SIMDE_FLOAT64_C( -667.19), SIMDE_FLOAT64_C( 338.98)),
2771 simde_mm_set_pd(SIMDE_FLOAT64_C( 225.94), SIMDE_FLOAT64_C( 338.98)),
2772 0 },
2773 { simde_mm_set_pd(SIMDE_FLOAT64_C( 890.13), SIMDE_FLOAT64_C( -203.09)),
2774 simde_mm_set_pd(SIMDE_FLOAT64_C( -221.49), SIMDE_FLOAT64_C( 304.99)),
2775 0 }
2776 };
2777
2778 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2779 int r = simde_mm_comigt_sd(test_vec[i].a, test_vec[i].b);
2780 simde_assert_equal_i(r, test_vec[i].r);
2781 }
2782
2783 return 0;
2784 }
2785
2786 static int
test_simde_mm_comile_sd(SIMDE_MUNIT_TEST_ARGS)2787 test_simde_mm_comile_sd(SIMDE_MUNIT_TEST_ARGS) {
2788 const struct {
2789 simde__m128d a;
2790 simde__m128d b;
2791 int r;
2792 } test_vec[8] = {
2793 { simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 552.43)),
2794 simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 267.88)),
2795 0 },
2796 { simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( -921.70)),
2797 simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( 330.81)),
2798 1 },
2799 { simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( -938.03)),
2800 simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( 970.01)),
2801 1 },
2802 { simde_mm_set_pd(SIMDE_FLOAT64_C( -158.01), SIMDE_FLOAT64_C( 635.18)),
2803 simde_mm_set_pd(SIMDE_FLOAT64_C( -394.88), SIMDE_FLOAT64_C( -19.73)),
2804 0 },
2805 { simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -429.43)),
2806 simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -32.37)),
2807 1 },
2808 { simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( -241.62)),
2809 simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( 500.55)),
2810 1 },
2811 { simde_mm_set_pd(SIMDE_FLOAT64_C( -667.19), SIMDE_FLOAT64_C( 338.98)),
2812 simde_mm_set_pd(SIMDE_FLOAT64_C( 225.94), SIMDE_FLOAT64_C( 338.98)),
2813 1 },
2814 { simde_mm_set_pd(SIMDE_FLOAT64_C( 890.13), SIMDE_FLOAT64_C( -203.09)),
2815 simde_mm_set_pd(SIMDE_FLOAT64_C( -221.49), SIMDE_FLOAT64_C( 304.99)),
2816 1 }
2817 };
2818
2819 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2820 int r = simde_mm_comile_sd(test_vec[i].a, test_vec[i].b);
2821 simde_assert_equal_i(r, test_vec[i].r);
2822 }
2823
2824 return 0;
2825 }
2826
2827 static int
test_simde_mm_comilt_sd(SIMDE_MUNIT_TEST_ARGS)2828 test_simde_mm_comilt_sd(SIMDE_MUNIT_TEST_ARGS) {
2829 const struct {
2830 simde__m128d a;
2831 simde__m128d b;
2832 int r;
2833 } test_vec[8] = {
2834 { simde_mm_set_pd(SIMDE_FLOAT64_C( 766.54), SIMDE_FLOAT64_C( -69.58)),
2835 simde_mm_set_pd(SIMDE_FLOAT64_C( 185.38), SIMDE_FLOAT64_C( -69.58)),
2836 0 },
2837 { simde_mm_set_pd(SIMDE_FLOAT64_C( 489.65), SIMDE_FLOAT64_C( 372.98)),
2838 simde_mm_set_pd(SIMDE_FLOAT64_C( 489.65), SIMDE_FLOAT64_C( 372.98)),
2839 0 },
2840 { simde_mm_set_pd(SIMDE_FLOAT64_C( 79.48), SIMDE_FLOAT64_C( -168.45)),
2841 simde_mm_set_pd(SIMDE_FLOAT64_C( -648.03), SIMDE_FLOAT64_C( -710.04)),
2842 0 },
2843 { simde_mm_set_pd(SIMDE_FLOAT64_C( 907.60), SIMDE_FLOAT64_C( 955.73)),
2844 simde_mm_set_pd(SIMDE_FLOAT64_C( 907.60), SIMDE_FLOAT64_C( -965.39)),
2845 0 },
2846 { simde_mm_set_pd(SIMDE_FLOAT64_C( -237.33), SIMDE_FLOAT64_C( 558.83)),
2847 simde_mm_set_pd(SIMDE_FLOAT64_C( 415.12), SIMDE_FLOAT64_C( 558.83)),
2848 0 },
2849 { simde_mm_set_pd(SIMDE_FLOAT64_C( -796.13), SIMDE_FLOAT64_C( 18.69)),
2850 simde_mm_set_pd(SIMDE_FLOAT64_C( -796.13), SIMDE_FLOAT64_C( 18.69)),
2851 0 },
2852 { simde_mm_set_pd(SIMDE_FLOAT64_C( -380.36), SIMDE_FLOAT64_C( -737.73)),
2853 simde_mm_set_pd(SIMDE_FLOAT64_C( -380.36), SIMDE_FLOAT64_C( -737.73)),
2854 0 },
2855 { simde_mm_set_pd(SIMDE_FLOAT64_C( -975.15), SIMDE_FLOAT64_C( -296.93)),
2856 simde_mm_set_pd(SIMDE_FLOAT64_C( -975.15), SIMDE_FLOAT64_C( -296.93)),
2857 0 }
2858 };
2859
2860 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2861 int r = simde_mm_comilt_sd(test_vec[i].a, test_vec[i].b);
2862 simde_assert_equal_i(r, test_vec[i].r);
2863 }
2864
2865 return 0;
2866 }
2867
2868 static int
test_simde_mm_comineq_sd(SIMDE_MUNIT_TEST_ARGS)2869 test_simde_mm_comineq_sd(SIMDE_MUNIT_TEST_ARGS) {
2870 const struct {
2871 simde__m128d a;
2872 simde__m128d b;
2873 int r;
2874 } test_vec[8] = {
2875 { simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 552.43)),
2876 simde_mm_set_pd(SIMDE_FLOAT64_C( 105.66), SIMDE_FLOAT64_C( 267.88)),
2877 1 },
2878 { simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( -921.70)),
2879 simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( 330.81)),
2880 1 },
2881 { simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( -938.03)),
2882 simde_mm_set_pd(SIMDE_FLOAT64_C( 865.75), SIMDE_FLOAT64_C( 970.01)),
2883 1 },
2884 { simde_mm_set_pd(SIMDE_FLOAT64_C( -158.01), SIMDE_FLOAT64_C( 635.18)),
2885 simde_mm_set_pd(SIMDE_FLOAT64_C( -394.88), SIMDE_FLOAT64_C( -19.73)),
2886 1 },
2887 { simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -429.43)),
2888 simde_mm_set_pd(SIMDE_FLOAT64_C( -29.19), SIMDE_FLOAT64_C( -32.37)),
2889 1 },
2890 { simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( -241.62)),
2891 simde_mm_set_pd(SIMDE_FLOAT64_C( 507.45), SIMDE_FLOAT64_C( 500.55)),
2892 1 },
2893 { simde_mm_set_pd(SIMDE_FLOAT64_C( -667.19), SIMDE_FLOAT64_C( 338.98)),
2894 simde_mm_set_pd(SIMDE_FLOAT64_C( 225.94), SIMDE_FLOAT64_C( 338.98)),
2895 0 },
2896 { simde_mm_set_pd(SIMDE_FLOAT64_C( 890.13), SIMDE_FLOAT64_C( -203.09)),
2897 simde_mm_set_pd(SIMDE_FLOAT64_C( -221.49), SIMDE_FLOAT64_C( 304.99)),
2898 1 }
2899 };
2900
2901 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2902 int r = simde_mm_comineq_sd(test_vec[i].a, test_vec[i].b);
2903 simde_assert_equal_i(r, test_vec[i].r);
2904 }
2905
2906 return 0;
2907 }
2908
2909 static int
test_simde_x_mm_copysign_pd(SIMDE_MUNIT_TEST_ARGS)2910 test_simde_x_mm_copysign_pd (SIMDE_MUNIT_TEST_ARGS) {
2911 static const struct {
2912 const simde_float64 dest[2];
2913 const simde_float64 src[2];
2914 const simde_float64 r[2];
2915 } test_vec[] = {
2916 { { SIMDE_FLOAT64_C( -182.01), SIMDE_FLOAT64_C( 309.30) },
2917 { SIMDE_FLOAT64_C( -125.98), SIMDE_FLOAT64_C( -334.42) },
2918 { SIMDE_FLOAT64_C( -182.01), SIMDE_FLOAT64_C( -309.30) } },
2919 { { SIMDE_FLOAT64_C( -339.97), SIMDE_FLOAT64_C( -147.14) },
2920 { SIMDE_FLOAT64_C( 534.39), SIMDE_FLOAT64_C( -377.91) },
2921 { SIMDE_FLOAT64_C( 339.97), SIMDE_FLOAT64_C( -147.14) } },
2922 { { SIMDE_FLOAT64_C( -466.30), SIMDE_FLOAT64_C( 794.64) },
2923 { SIMDE_FLOAT64_C( 936.51), SIMDE_FLOAT64_C( -627.08) },
2924 { SIMDE_FLOAT64_C( 466.30), SIMDE_FLOAT64_C( -794.64) } },
2925 { { SIMDE_FLOAT64_C( 644.80), SIMDE_FLOAT64_C( 412.58) },
2926 { SIMDE_FLOAT64_C( -738.56), SIMDE_FLOAT64_C( -987.18) },
2927 { SIMDE_FLOAT64_C( -644.80), SIMDE_FLOAT64_C( -412.58) } },
2928 { { SIMDE_FLOAT64_C( -54.12), SIMDE_FLOAT64_C( -858.45) },
2929 { SIMDE_FLOAT64_C( -554.31), SIMDE_FLOAT64_C( 274.31) },
2930 { SIMDE_FLOAT64_C( -54.12), SIMDE_FLOAT64_C( 858.45) } },
2931 { { SIMDE_FLOAT64_C( -106.06), SIMDE_FLOAT64_C( -482.09) },
2932 { SIMDE_FLOAT64_C( -505.26), SIMDE_FLOAT64_C( -310.15) },
2933 { SIMDE_FLOAT64_C( -106.06), SIMDE_FLOAT64_C( -482.09) } },
2934 { { SIMDE_FLOAT64_C( 726.18), SIMDE_FLOAT64_C( 941.28) },
2935 { SIMDE_FLOAT64_C( -987.65), SIMDE_FLOAT64_C( -463.18) },
2936 { SIMDE_FLOAT64_C( -726.18), SIMDE_FLOAT64_C( -941.28) } },
2937 { { SIMDE_FLOAT64_C( -907.04), SIMDE_FLOAT64_C( -842.82) },
2938 { SIMDE_FLOAT64_C( -124.70), SIMDE_FLOAT64_C( -89.06) },
2939 { SIMDE_FLOAT64_C( -907.04), SIMDE_FLOAT64_C( -842.82) } }
2940 };
2941
2942 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
2943 simde__m128d dest = simde_mm_loadu_pd(test_vec[i].dest);
2944 simde__m128d src = simde_mm_loadu_pd(test_vec[i].src);
2945 simde__m128d r = simde_x_mm_copysign_pd(dest, src);
2946 simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1);
2947 }
2948
2949 return 0;
2950 }
2951
2952 static int
test_simde_x_mm_xorsign_pd(SIMDE_MUNIT_TEST_ARGS)2953 test_simde_x_mm_xorsign_pd (SIMDE_MUNIT_TEST_ARGS) {
2954 static const struct {
2955 const simde_float64 a[2];
2956 const simde_float64 b[2];
2957 const simde_float64 r[2];
2958 } test_vec[] = {
2959 { { SIMDE_FLOAT64_C( -998.44), SIMDE_FLOAT64_C( -179.45) },
2960 { SIMDE_FLOAT64_C( 34.66), SIMDE_FLOAT64_C( 254.98) },
2961 { SIMDE_FLOAT64_C( -998.44), SIMDE_FLOAT64_C( -179.45) } },
2962 { { SIMDE_FLOAT64_C( -220.74), SIMDE_FLOAT64_C( 718.77) },
2963 { SIMDE_FLOAT64_C( -648.69), SIMDE_FLOAT64_C( -598.91) },
2964 { SIMDE_FLOAT64_C( 220.74), SIMDE_FLOAT64_C( -718.77) } },
2965 { { SIMDE_FLOAT64_C( 84.66), SIMDE_FLOAT64_C( -602.04) },
2966 { SIMDE_FLOAT64_C( 631.55), SIMDE_FLOAT64_C( -486.59) },
2967 { SIMDE_FLOAT64_C( 84.66), SIMDE_FLOAT64_C( 602.04) } },
2968 { { SIMDE_FLOAT64_C( 570.81), SIMDE_FLOAT64_C( 368.00) },
2969 { SIMDE_FLOAT64_C( 372.19), SIMDE_FLOAT64_C( -832.84) },
2970 { SIMDE_FLOAT64_C( 570.81), SIMDE_FLOAT64_C( -368.00) } },
2971 { { SIMDE_FLOAT64_C( -996.05), SIMDE_FLOAT64_C( 875.71) },
2972 { SIMDE_FLOAT64_C( 198.29), SIMDE_FLOAT64_C( -187.87) },
2973 { SIMDE_FLOAT64_C( -996.05), SIMDE_FLOAT64_C( -875.71) } },
2974 { { SIMDE_FLOAT64_C( -462.20), SIMDE_FLOAT64_C( -277.60) },
2975 { SIMDE_FLOAT64_C( 841.75), SIMDE_FLOAT64_C( 127.22) },
2976 { SIMDE_FLOAT64_C( -462.20), SIMDE_FLOAT64_C( -277.60) } },
2977 { { SIMDE_FLOAT64_C( -669.20), SIMDE_FLOAT64_C( -206.42) },
2978 { SIMDE_FLOAT64_C( 600.14), SIMDE_FLOAT64_C( 65.01) },
2979 { SIMDE_FLOAT64_C( -669.20), SIMDE_FLOAT64_C( -206.42) } },
2980 { { SIMDE_FLOAT64_C( 159.77), SIMDE_FLOAT64_C( -896.78) },
2981 { SIMDE_FLOAT64_C( 642.72), SIMDE_FLOAT64_C( 161.33) },
2982 { SIMDE_FLOAT64_C( 159.77), SIMDE_FLOAT64_C( -896.78) } }
2983 };
2984
2985 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
2986 simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
2987 simde__m128d b = simde_mm_loadu_pd(test_vec[i].b);
2988 simde__m128d r = simde_x_mm_xorsign_pd(a, b);
2989 simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1);
2990 }
2991
2992 return 0;
2993 }
2994
2995 static int
test_simde_mm_cvtepi32_pd(SIMDE_MUNIT_TEST_ARGS)2996 test_simde_mm_cvtepi32_pd(SIMDE_MUNIT_TEST_ARGS) {
2997 const struct {
2998 simde__m128i a;
2999 simde__m128d r;
3000 } test_vec[8] = {
3001 { simde_mm_set_epi32( 1668601445, 8850426, 1726684816, -1842005323),
3002 simde_mm_set_pd(SIMDE_FLOAT64_C(1726684816.00), SIMDE_FLOAT64_C(-1842005323.00)) },
3003 { simde_mm_set_epi32(-1162443511, 1098837378, -970075414, 1210551220),
3004 simde_mm_set_pd(SIMDE_FLOAT64_C(-970075414.00), SIMDE_FLOAT64_C(1210551220.00)) },
3005 { simde_mm_set_epi32( 1014915875, 235168560, 691866984, -431325465),
3006 simde_mm_set_pd(SIMDE_FLOAT64_C(691866984.00), SIMDE_FLOAT64_C(-431325465.00)) },
3007 { simde_mm_set_epi32( 1621419008, 1286931249, -1424446000, -169673917),
3008 simde_mm_set_pd(SIMDE_FLOAT64_C(-1424446000.00), SIMDE_FLOAT64_C(-169673917.00)) },
3009 { simde_mm_set_epi32( 982570498, 31161721, 410129833, 1249524705),
3010 simde_mm_set_pd(SIMDE_FLOAT64_C(410129833.00), SIMDE_FLOAT64_C(1249524705.00)) },
3011 { simde_mm_set_epi32(-1807976526, 584564543, 1386856775, -792093051),
3012 simde_mm_set_pd(SIMDE_FLOAT64_C(1386856775.00), SIMDE_FLOAT64_C(-792093051.00)) },
3013 { simde_mm_set_epi32( 1927957259, 324939853, 1056227907, 960202603),
3014 simde_mm_set_pd(SIMDE_FLOAT64_C(1056227907.00), SIMDE_FLOAT64_C(960202603.00)) },
3015 { simde_mm_set_epi32( 2096858414, 2117774841, 250894175, 1268045519),
3016 simde_mm_set_pd(SIMDE_FLOAT64_C(250894175.00), SIMDE_FLOAT64_C(1268045519.00)) }
3017 };
3018
3019 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3020 simde__m128d r = simde_mm_cvtepi32_pd(test_vec[i].a);
3021 simde_assert_m128d_close(r, test_vec[i].r, 1);
3022 }
3023
3024 return 0;
3025 }
3026
3027 static int
test_simde_mm_cvtepi32_ps(SIMDE_MUNIT_TEST_ARGS)3028 test_simde_mm_cvtepi32_ps(SIMDE_MUNIT_TEST_ARGS) {
3029 const struct {
3030 simde__m128i a;
3031 simde__m128 r;
3032 } test_vec[8] = {
3033 { simde_mm_set_epi32( 332, -787, -79, -785),
3034 simde_mm_set_ps(SIMDE_FLOAT32_C( 332.00), SIMDE_FLOAT32_C(-787.00), SIMDE_FLOAT32_C( -79.00), SIMDE_FLOAT32_C(-785.00)) },
3035 { simde_mm_set_epi32( 394, -936, -733, -136),
3036 simde_mm_set_ps(SIMDE_FLOAT32_C( 394.00), SIMDE_FLOAT32_C(-936.00), SIMDE_FLOAT32_C(-733.00), SIMDE_FLOAT32_C(-136.00)) },
3037 { simde_mm_set_epi32( 618, -416, 310, 183),
3038 simde_mm_set_ps(SIMDE_FLOAT32_C( 618.00), SIMDE_FLOAT32_C(-416.00), SIMDE_FLOAT32_C( 310.00), SIMDE_FLOAT32_C( 183.00)) },
3039 { simde_mm_set_epi32(-748, 245, 533, -152),
3040 simde_mm_set_ps(SIMDE_FLOAT32_C(-748.00), SIMDE_FLOAT32_C( 245.00), SIMDE_FLOAT32_C( 533.00), SIMDE_FLOAT32_C(-152.00)) },
3041 { simde_mm_set_epi32( 42, 893, 849, -741),
3042 simde_mm_set_ps(SIMDE_FLOAT32_C( 42.00), SIMDE_FLOAT32_C( 893.00), SIMDE_FLOAT32_C( 849.00), SIMDE_FLOAT32_C(-741.00)) },
3043 { simde_mm_set_epi32( 657, 222, -709, -177),
3044 simde_mm_set_ps(SIMDE_FLOAT32_C( 657.00), SIMDE_FLOAT32_C( 222.00), SIMDE_FLOAT32_C(-709.00), SIMDE_FLOAT32_C(-177.00)) },
3045 { simde_mm_set_epi32( 762, -586, 196, 717),
3046 simde_mm_set_ps(SIMDE_FLOAT32_C( 762.00), SIMDE_FLOAT32_C(-586.00), SIMDE_FLOAT32_C( 196.00), SIMDE_FLOAT32_C( 717.00)) },
3047 { simde_mm_set_epi32( 322, 178, 766, -110),
3048 simde_mm_set_ps(SIMDE_FLOAT32_C( 322.00), SIMDE_FLOAT32_C( 178.00), SIMDE_FLOAT32_C( 766.00), SIMDE_FLOAT32_C(-110.00)) }
3049 };
3050
3051 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3052 simde__m128 r = simde_mm_cvtepi32_ps(test_vec[i].a);
3053 simde_assert_m128_close(r, test_vec[i].r, 1);
3054 }
3055
3056 return 0;
3057 }
3058
3059 static int
test_simde_mm_cvtpd_epi32(SIMDE_MUNIT_TEST_ARGS)3060 test_simde_mm_cvtpd_epi32 (SIMDE_MUNIT_TEST_ARGS) {
3061 static const struct {
3062 const simde_float64 a[2];
3063 const int32_t r[4];
3064 } test_vec[] = {
3065 #if !defined(SIMDE_FAST_NANS)
3066 { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN },
3067 { INT32_MIN, INT32_MIN, INT32_C( 0), INT32_C( 0) } },
3068 #endif
3069 #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3070 { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100) },
3071 { INT32_MIN, INT32_C( 2147483547), INT32_C( 0), INT32_C( 0) } },
3072 { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100 },
3073 { INT32_MIN, -INT32_C( 2147483548), INT32_C( 0), INT32_C( 0) } },
3074 #endif
3075 { { SIMDE_FLOAT64_C( -220.31), SIMDE_FLOAT64_C( 685.08) },
3076 { -INT32_C( 220), INT32_C( 685), INT32_C( 0), INT32_C( 0) } },
3077 { { SIMDE_FLOAT64_C( -164.88), SIMDE_FLOAT64_C( 725.51) },
3078 { -INT32_C( 165), INT32_C( 726), INT32_C( 0), INT32_C( 0) } },
3079 { { SIMDE_FLOAT64_C( 152.74), SIMDE_FLOAT64_C( 778.03) },
3080 { INT32_C( 153), INT32_C( 778), INT32_C( 0), INT32_C( 0) } },
3081 { { SIMDE_FLOAT64_C( -801.11), SIMDE_FLOAT64_C( -331.66) },
3082 { -INT32_C( 801), -INT32_C( 332), INT32_C( 0), INT32_C( 0) } },
3083 { { SIMDE_FLOAT64_C( -834.04), SIMDE_FLOAT64_C( -51.56) },
3084 { -INT32_C( 834), -INT32_C( 52), INT32_C( 0), INT32_C( 0) } },
3085 { { SIMDE_FLOAT64_C( 737.22), SIMDE_FLOAT64_C( 205.77) },
3086 { INT32_C( 737), INT32_C( 206), INT32_C( 0), INT32_C( 0) } }
3087 };
3088
3089 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3090 simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
3091 simde__m128i r = simde_mm_cvtpd_epi32(a);
3092 simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
3093 }
3094
3095 return 0;
3096 }
3097
3098 static int
test_simde_mm_cvtpd_pi32(SIMDE_MUNIT_TEST_ARGS)3099 test_simde_mm_cvtpd_pi32 (SIMDE_MUNIT_TEST_ARGS) {
3100 static const struct {
3101 const simde_float64 a[2];
3102 const int32_t r[2];
3103 } test_vec[] = {
3104 #if !defined(SIMDE_FAST_NANS)
3105 { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN },
3106 { INT32_MIN, INT32_MIN } },
3107 #endif
3108 #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3109 { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100) },
3110 { INT32_MIN, INT32_C( 2147483547) } },
3111 { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100 },
3112 { INT32_MIN, -INT32_C( 2147483548) } },
3113 #endif
3114 { { SIMDE_FLOAT64_C( -220.31), SIMDE_FLOAT64_C( 685.08) },
3115 { -INT32_C( 220), INT32_C( 685) } },
3116 { { SIMDE_FLOAT64_C( -164.88), SIMDE_FLOAT64_C( 725.51) },
3117 { -INT32_C( 165), INT32_C( 726) } },
3118 { { SIMDE_FLOAT64_C( 152.74), SIMDE_FLOAT64_C( 778.03) },
3119 { INT32_C( 153), INT32_C( 778) } },
3120 { { SIMDE_FLOAT64_C( -801.11), SIMDE_FLOAT64_C( -331.66) },
3121 { -INT32_C( 801), -INT32_C( 332) } },
3122 { { SIMDE_FLOAT64_C( -834.04), SIMDE_FLOAT64_C( -51.56) },
3123 { -INT32_C( 834), -INT32_C( 52) } },
3124 { { SIMDE_FLOAT64_C( 737.22), SIMDE_FLOAT64_C( 205.77) },
3125 { INT32_C( 737), INT32_C( 206) } }
3126 };
3127
3128 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3129 simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
3130 simde__m64 r = simde_mm_cvtpd_pi32(a);
3131 simde_test_x86_assert_equal_i32x2(r, simde_x_mm_loadu_si64(test_vec[i].r));
3132 }
3133
3134 return 0;
3135 }
3136
3137 static int
test_simde_mm_cvtpd_ps(SIMDE_MUNIT_TEST_ARGS)3138 test_simde_mm_cvtpd_ps(SIMDE_MUNIT_TEST_ARGS) {
3139 const struct {
3140 simde__m128d a;
3141 simde__m128 r;
3142 } test_vec[8] = {
3143 { simde_mm_set_pd(SIMDE_FLOAT64_C( 655.71), SIMDE_FLOAT64_C( 689.41)),
3144 simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 655.71), SIMDE_FLOAT32_C( 689.41)) },
3145 { simde_mm_set_pd(SIMDE_FLOAT64_C( -50.10), SIMDE_FLOAT64_C( -149.72)),
3146 simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -50.10), SIMDE_FLOAT32_C( -149.72)) },
3147 { simde_mm_set_pd(SIMDE_FLOAT64_C( 227.42), SIMDE_FLOAT64_C( 655.70)),
3148 simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 227.42), SIMDE_FLOAT32_C( 655.70)) },
3149 { simde_mm_set_pd(SIMDE_FLOAT64_C( -635.17), SIMDE_FLOAT64_C( 938.65)),
3150 simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -635.17), SIMDE_FLOAT32_C( 938.65)) },
3151 { simde_mm_set_pd(SIMDE_FLOAT64_C( 548.99), SIMDE_FLOAT64_C( -18.53)),
3152 simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 548.99), SIMDE_FLOAT32_C( -18.53)) },
3153 { simde_mm_set_pd(SIMDE_FLOAT64_C( -548.71), SIMDE_FLOAT64_C( 31.33)),
3154 simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -548.71), SIMDE_FLOAT32_C( 31.33)) },
3155 { simde_mm_set_pd(SIMDE_FLOAT64_C( -978.36), SIMDE_FLOAT64_C( -341.93)),
3156 simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -978.36), SIMDE_FLOAT32_C( -341.93)) },
3157 { simde_mm_set_pd(SIMDE_FLOAT64_C( 211.73), SIMDE_FLOAT64_C( 471.24)),
3158 simde_mm_set_ps(SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 211.73), SIMDE_FLOAT32_C( 471.24)) }
3159 };
3160
3161 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3162 simde__m128 r = simde_mm_cvtpd_ps(test_vec[i].a);
3163 simde_assert_m128_close(r, test_vec[i].r, 1);
3164 }
3165
3166 return 0;
3167 }
3168
3169 static int
test_simde_mm_cvtpi32_pd(SIMDE_MUNIT_TEST_ARGS)3170 test_simde_mm_cvtpi32_pd(SIMDE_MUNIT_TEST_ARGS) {
3171 const struct {
3172 simde__m64 a;
3173 simde__m128d r;
3174 } test_vec[8] = {
3175 { simde_mm_set_pi32( -42, -579),
3176 simde_mm_set_pd(SIMDE_FLOAT64_C( -42.00), SIMDE_FLOAT64_C(-579.00)) },
3177 { simde_mm_set_pi32( -633, 29),
3178 simde_mm_set_pd(SIMDE_FLOAT64_C(-633.00), SIMDE_FLOAT64_C( 29.00)) },
3179 { simde_mm_set_pi32( -149, 196),
3180 simde_mm_set_pd(SIMDE_FLOAT64_C(-149.00), SIMDE_FLOAT64_C( 196.00)) },
3181 { simde_mm_set_pi32( 308, -433),
3182 simde_mm_set_pd(SIMDE_FLOAT64_C( 308.00), SIMDE_FLOAT64_C(-433.00)) },
3183 { simde_mm_set_pi32( -881, 358),
3184 simde_mm_set_pd(SIMDE_FLOAT64_C(-881.00), SIMDE_FLOAT64_C( 358.00)) },
3185 { simde_mm_set_pi32( 723, 273),
3186 simde_mm_set_pd(SIMDE_FLOAT64_C( 723.00), SIMDE_FLOAT64_C( 273.00)) },
3187 { simde_mm_set_pi32( -182, 457),
3188 simde_mm_set_pd(SIMDE_FLOAT64_C(-182.00), SIMDE_FLOAT64_C( 457.00)) },
3189 { simde_mm_set_pi32( -239, -577),
3190 simde_mm_set_pd(SIMDE_FLOAT64_C(-239.00), SIMDE_FLOAT64_C(-577.00)) }
3191 };
3192
3193 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3194 simde__m128d r = simde_mm_cvtpi32_pd(test_vec[i].a);
3195 simde_assert_m128d_close(r, test_vec[i].r, 1);
3196 }
3197
3198 return 0;
3199 }
3200
3201 static int
test_simde_mm_cvtps_epi32(SIMDE_MUNIT_TEST_ARGS)3202 test_simde_mm_cvtps_epi32(SIMDE_MUNIT_TEST_ARGS) {
3203 static const struct {
3204 const simde_float32 a[4];
3205 const int32_t r[4];
3206 } test_vec[] = {
3207 #if !defined(SIMDE_FAST_NANS)
3208 { { SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_FLOAT32_C( 718.49), SIMDE_FLOAT32_C( -765.08) },
3209 { INT32_MIN, INT32_MIN, INT32_C( 718), -INT32_C( 765) } },
3210 #endif
3211 #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3212 { { HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
3213 HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
3214 HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
3215 HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100), },
3216 { INT32_MIN, INT32_C( 2147483520), INT32_MIN, -INT32_C( 2147483520) } },
3217 #endif
3218 #if !defined(SIMDE_FAST_ROUND_TIES)
3219 { { SIMDE_FLOAT32_C( -1.50), SIMDE_FLOAT32_C( 1.50), SIMDE_FLOAT32_C( -2.50), SIMDE_FLOAT32_C( 2.50) },
3220 { -INT32_C( 2), INT32_C( 2), -INT32_C( 2), INT32_C( 2) } },
3221 { { SIMDE_FLOAT32_C( -3.50), SIMDE_FLOAT32_C( 3.50), SIMDE_FLOAT32_C( -4.50), SIMDE_FLOAT32_C( 4.50) },
3222 { -INT32_C( 4), INT32_C( 4), -INT32_C( 4), INT32_C( 4) } },
3223 #endif
3224 { { SIMDE_FLOAT32_C( -95.52), SIMDE_FLOAT32_C( 603.57), SIMDE_FLOAT32_C( -810.91), SIMDE_FLOAT32_C( 527.98) },
3225 { -INT32_C( 96), INT32_C( 604), -INT32_C( 811), INT32_C( 528) } },
3226 { { SIMDE_FLOAT32_C( -768.18), SIMDE_FLOAT32_C( -162.82), SIMDE_FLOAT32_C( -159.43), SIMDE_FLOAT32_C( 588.60) },
3227 { -INT32_C( 768), -INT32_C( 163), -INT32_C( 159), INT32_C( 589) } },
3228 { { SIMDE_FLOAT32_C( 84.90), SIMDE_FLOAT32_C( -904.57), SIMDE_FLOAT32_C( -209.20), SIMDE_FLOAT32_C( 264.55) },
3229 { INT32_C( 85), -INT32_C( 905), -INT32_C( 209), INT32_C( 265) } },
3230 { { SIMDE_FLOAT32_C( -19.50), SIMDE_FLOAT32_C( -416.92), SIMDE_FLOAT32_C( -780.86), SIMDE_FLOAT32_C( -31.81) },
3231 { -INT32_C( 20), -INT32_C( 417), -INT32_C( 781), -INT32_C( 32) } },
3232 { { SIMDE_FLOAT32_C( -561.41), SIMDE_FLOAT32_C( -689.14), SIMDE_FLOAT32_C( 434.56), SIMDE_FLOAT32_C( 432.69) },
3233 { -INT32_C( 561), -INT32_C( 689), INT32_C( 435), INT32_C( 433) } },
3234 { { SIMDE_FLOAT32_C( 170.13), SIMDE_FLOAT32_C( 594.22), SIMDE_FLOAT32_C( -888.51), SIMDE_FLOAT32_C( 321.54) },
3235 { INT32_C( 170), INT32_C( 594), -INT32_C( 889), INT32_C( 322) } },
3236 { { SIMDE_FLOAT32_C( 660.47), SIMDE_FLOAT32_C( -124.04), SIMDE_FLOAT32_C( 493.83), SIMDE_FLOAT32_C( 250.16) },
3237 { INT32_C( 660), -INT32_C( 124), INT32_C( 494), INT32_C( 250) } },
3238 { { SIMDE_FLOAT32_C( -314.21), SIMDE_FLOAT32_C( -16.38), SIMDE_FLOAT32_C( 852.78), SIMDE_FLOAT32_C( 590.27) },
3239 { -INT32_C( 314), -INT32_C( 16), INT32_C( 853), INT32_C( 590) } },
3240 { { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0) },
3241 { -INT32_C( 2147483648), -INT32_C( 2147483648), -INT32_C( 2147483648), -INT32_C( 2147483648) } },
3242 { { SIMDE_FLOAT32_C( 2147483649.0), SIMDE_FLOAT32_C( 2147483649.0), SIMDE_FLOAT32_C( 2147483649.0), SIMDE_FLOAT32_C( 2147483649.0) },
3243 { -INT32_C( 2147483648), -INT32_C( 2147483648), -INT32_C( 2147483648), -INT32_C( 2147483648) } }
3244 };
3245
3246 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3247 simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
3248 simde__m128i r = simde_mm_cvtps_epi32(a);
3249 simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
3250 }
3251
3252 return 0;
3253 }
3254
3255 static int
test_simde_mm_cvtps_pd(SIMDE_MUNIT_TEST_ARGS)3256 test_simde_mm_cvtps_pd(SIMDE_MUNIT_TEST_ARGS) {
3257 const struct {
3258 simde__m128 a;
3259 simde__m128d r;
3260 } test_vec[8] = {
3261 { simde_mm_set_ps(SIMDE_FLOAT32_C( 295.41), SIMDE_FLOAT32_C( -909.65), SIMDE_FLOAT32_C( 156.64), SIMDE_FLOAT32_C( -802.16)),
3262 simde_mm_set_pd(SIMDE_FLOAT64_C( 156.64), SIMDE_FLOAT64_C( -802.16)) },
3263 { simde_mm_set_ps(SIMDE_FLOAT32_C( 649.83), SIMDE_FLOAT32_C( -763.68), SIMDE_FLOAT32_C( 364.80), SIMDE_FLOAT32_C( 389.19)),
3264 simde_mm_set_pd(SIMDE_FLOAT64_C( 364.80), SIMDE_FLOAT64_C( 389.19)) },
3265 { simde_mm_set_ps(SIMDE_FLOAT32_C( 269.92), SIMDE_FLOAT32_C( -207.13), SIMDE_FLOAT32_C( 538.63), SIMDE_FLOAT32_C( 487.11)),
3266 simde_mm_set_pd(SIMDE_FLOAT64_C( 538.63), SIMDE_FLOAT64_C( 487.11)) },
3267 { simde_mm_set_ps(SIMDE_FLOAT32_C( -982.29), SIMDE_FLOAT32_C( 234.64), SIMDE_FLOAT32_C( -53.82), SIMDE_FLOAT32_C( 899.43)),
3268 simde_mm_set_pd(SIMDE_FLOAT64_C( -53.82), SIMDE_FLOAT64_C( 899.43)) },
3269 { simde_mm_set_ps(SIMDE_FLOAT32_C( 945.89), SIMDE_FLOAT32_C( -98.53), SIMDE_FLOAT32_C( 1.57), SIMDE_FLOAT32_C( 49.07)),
3270 simde_mm_set_pd(SIMDE_FLOAT64_C( 1.57), SIMDE_FLOAT64_C( 49.07)) },
3271 { simde_mm_set_ps(SIMDE_FLOAT32_C( -47.73), SIMDE_FLOAT32_C( 806.42), SIMDE_FLOAT32_C( 11.76), SIMDE_FLOAT32_C( -1.19)),
3272 simde_mm_set_pd(SIMDE_FLOAT64_C( 11.76), SIMDE_FLOAT64_C( -1.19)) },
3273 { simde_mm_set_ps(SIMDE_FLOAT32_C( -961.08), SIMDE_FLOAT32_C( -192.05), SIMDE_FLOAT32_C( 553.30), SIMDE_FLOAT32_C( -994.71)),
3274 simde_mm_set_pd(SIMDE_FLOAT64_C( 553.30), SIMDE_FLOAT64_C( -994.71)) },
3275 { simde_mm_set_ps(SIMDE_FLOAT32_C( 9.30), SIMDE_FLOAT32_C( -203.20), SIMDE_FLOAT32_C( -196.20), SIMDE_FLOAT32_C( 707.05)),
3276 simde_mm_set_pd(SIMDE_FLOAT64_C( -196.20), SIMDE_FLOAT64_C( 707.05)) }
3277 };
3278
3279 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3280 simde__m128d r = simde_mm_cvtps_pd(test_vec[i].a);
3281 simde_assert_m128d_close(r, test_vec[i].r, 1);
3282 }
3283
3284 return 0;
3285 }
3286
3287 static int
test_simde_mm_cvtsd_f64(SIMDE_MUNIT_TEST_ARGS)3288 test_simde_mm_cvtsd_f64(SIMDE_MUNIT_TEST_ARGS) {
3289 const struct {
3290 simde__m128d a;
3291 simde_float64 r;
3292 } test_vec[8] = {
3293 { simde_mm_set_pd(SIMDE_FLOAT64_C( 298.96), SIMDE_FLOAT64_C( 39.67)),
3294 SIMDE_FLOAT64_C( 39.67) },
3295 { simde_mm_set_pd(SIMDE_FLOAT64_C( -98.64), SIMDE_FLOAT64_C( -641.95)),
3296 SIMDE_FLOAT64_C(-641.95) },
3297 { simde_mm_set_pd(SIMDE_FLOAT64_C( -307.30), SIMDE_FLOAT64_C( -193.04)),
3298 SIMDE_FLOAT64_C(-193.04) },
3299 { simde_mm_set_pd(SIMDE_FLOAT64_C( -648.72), SIMDE_FLOAT64_C( 830.29)),
3300 SIMDE_FLOAT64_C( 830.29) },
3301 { simde_mm_set_pd(SIMDE_FLOAT64_C( -701.20), SIMDE_FLOAT64_C( -501.79)),
3302 SIMDE_FLOAT64_C(-501.79) },
3303 { simde_mm_set_pd(SIMDE_FLOAT64_C( 755.28), SIMDE_FLOAT64_C( 648.10)),
3304 SIMDE_FLOAT64_C( 648.10) },
3305 { simde_mm_set_pd(SIMDE_FLOAT64_C( -664.63), SIMDE_FLOAT64_C( 220.54)),
3306 SIMDE_FLOAT64_C( 220.54) },
3307 { simde_mm_set_pd(SIMDE_FLOAT64_C( -762.92), SIMDE_FLOAT64_C( -101.29)),
3308 SIMDE_FLOAT64_C(-101.29) }
3309 };
3310
3311 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3312 simde_float64 r = simde_mm_cvtsd_f64(test_vec[i].a);
3313 simde_assert_equal_f64(r, test_vec[i].r, 2);
3314 }
3315
3316 return 0;
3317 }
3318
3319 static int
test_simde_mm_cvtsd_si32(SIMDE_MUNIT_TEST_ARGS)3320 test_simde_mm_cvtsd_si32 (SIMDE_MUNIT_TEST_ARGS) {
3321 static const struct {
3322 const simde_float64 a[2];
3323 const int32_t r;
3324 } test_vec[] = {
3325 #if !defined(SIMDE_FAST_NANS)
3326 { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -162.87) },
3327 INT32_MIN },
3328 { { -SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -905.13) },
3329 INT32_MIN },
3330 #endif
3331 #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3332 { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), SIMDE_FLOAT64_C( 177.40) },
3333 INT32_MIN },
3334 { { HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100, SIMDE_FLOAT64_C( -906.88) },
3335 INT32_C( 2147483547) },
3336 { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), SIMDE_FLOAT64_C( 676.90) },
3337 INT32_MIN },
3338 { { HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100, SIMDE_FLOAT64_C( -848.13) },
3339 -INT32_C( 2147483548) },
3340 #endif
3341 { { SIMDE_FLOAT64_C( 353.29), SIMDE_FLOAT64_C( -16.32) },
3342 INT32_C( 353) },
3343 { { SIMDE_FLOAT64_C( 477.70), SIMDE_FLOAT64_C( -131.04) },
3344 INT32_C( 478) },
3345 { { SIMDE_FLOAT64_C( -314.42), SIMDE_FLOAT64_C( -351.80) },
3346 -INT32_C( 314) },
3347 { { SIMDE_FLOAT64_C( -574.04), SIMDE_FLOAT64_C( -761.46) },
3348 -INT32_C( 574) },
3349 { { SIMDE_FLOAT64_C( -428.08), SIMDE_FLOAT64_C( 959.55) },
3350 -INT32_C( 428) },
3351 { { SIMDE_FLOAT64_C( 453.56), SIMDE_FLOAT64_C( -261.91) },
3352 INT32_C( 454) }
3353 };
3354
3355 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3356 simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
3357 int32_t r = simde_mm_cvtsd_si32(a);
3358 simde_assert_equal_i32(r, test_vec[i].r);
3359 }
3360
3361 return 0;
3362 }
3363
3364 static int
test_simde_mm_cvtsd_si64(SIMDE_MUNIT_TEST_ARGS)3365 test_simde_mm_cvtsd_si64(SIMDE_MUNIT_TEST_ARGS) {
3366 const struct {
3367 simde__m128d a;
3368 int64_t r;
3369 } test_vec[8] = {
3370 { simde_mm_set_pd(SIMDE_FLOAT64_C( 793.30), SIMDE_FLOAT64_C( -706.75)), -707 },
3371 { simde_mm_set_pd(SIMDE_FLOAT64_C( 29.13), SIMDE_FLOAT64_C( -309.00)), -309 },
3372 { simde_mm_set_pd(SIMDE_FLOAT64_C( 21.24), SIMDE_FLOAT64_C( 368.17)), 368 },
3373 { simde_mm_set_pd(SIMDE_FLOAT64_C( -595.30), SIMDE_FLOAT64_C( 351.60)), 352 },
3374 { simde_mm_set_pd(SIMDE_FLOAT64_C( -640.13), SIMDE_FLOAT64_C( -466.84)), -467 },
3375 { simde_mm_set_pd(SIMDE_FLOAT64_C( -237.20), SIMDE_FLOAT64_C( -994.72)), -995 },
3376 { simde_mm_set_pd(SIMDE_FLOAT64_C( -983.23), SIMDE_FLOAT64_C( 645.14)), 645 },
3377 { simde_mm_set_pd(SIMDE_FLOAT64_C( -89.10), SIMDE_FLOAT64_C( 585.69)), 586 }
3378 };
3379
3380 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3381 int64_t r = simde_mm_cvtsd_si64(test_vec[i].a);
3382 simde_assert_equal_i64(r, test_vec[i].r);
3383 }
3384
3385 return 0;
3386 }
3387
3388 static int
test_simde_mm_cvtsd_ss(SIMDE_MUNIT_TEST_ARGS)3389 test_simde_mm_cvtsd_ss(SIMDE_MUNIT_TEST_ARGS) {
3390 const struct {
3391 simde__m128 a;
3392 simde__m128d b;
3393 simde__m128 r;
3394 } test_vec[8] = {
3395 { simde_mm_set_ps(SIMDE_FLOAT32_C( 522.41), SIMDE_FLOAT32_C( 122.44), SIMDE_FLOAT32_C( 708.76), SIMDE_FLOAT32_C( 910.97)),
3396 simde_mm_set_pd(SIMDE_FLOAT64_C( -52.04), SIMDE_FLOAT64_C( 228.75)),
3397 simde_mm_set_ps(SIMDE_FLOAT32_C( 522.41), SIMDE_FLOAT32_C( 122.44), SIMDE_FLOAT32_C( 708.76), SIMDE_FLOAT32_C( 228.75)) },
3398 { simde_mm_set_ps(SIMDE_FLOAT32_C( -311.44), SIMDE_FLOAT32_C( 267.00), SIMDE_FLOAT32_C( 965.23), SIMDE_FLOAT32_C( -248.92)),
3399 simde_mm_set_pd(SIMDE_FLOAT64_C( -89.48), SIMDE_FLOAT64_C( 178.71)),
3400 simde_mm_set_ps(SIMDE_FLOAT32_C( -311.44), SIMDE_FLOAT32_C( 267.00), SIMDE_FLOAT32_C( 965.23), SIMDE_FLOAT32_C( 178.71)) },
3401 { simde_mm_set_ps(SIMDE_FLOAT32_C( 252.50), SIMDE_FLOAT32_C( 744.35), SIMDE_FLOAT32_C( 237.50), SIMDE_FLOAT32_C( 713.77)),
3402 simde_mm_set_pd(SIMDE_FLOAT64_C( -913.96), SIMDE_FLOAT64_C( 935.45)),
3403 simde_mm_set_ps(SIMDE_FLOAT32_C( 252.50), SIMDE_FLOAT32_C( 744.35), SIMDE_FLOAT32_C( 237.50), SIMDE_FLOAT32_C( 935.45)) },
3404 { simde_mm_set_ps(SIMDE_FLOAT32_C( 781.08), SIMDE_FLOAT32_C( -50.03), SIMDE_FLOAT32_C( -658.11), SIMDE_FLOAT32_C( 945.59)),
3405 simde_mm_set_pd(SIMDE_FLOAT64_C( -556.84), SIMDE_FLOAT64_C( 452.90)),
3406 simde_mm_set_ps(SIMDE_FLOAT32_C( 781.08), SIMDE_FLOAT32_C( -50.03), SIMDE_FLOAT32_C( -658.11), SIMDE_FLOAT32_C( 452.90)) },
3407 { simde_mm_set_ps(SIMDE_FLOAT32_C( 459.13), SIMDE_FLOAT32_C( 794.72), SIMDE_FLOAT32_C( 105.91), SIMDE_FLOAT32_C( 688.90)),
3408 simde_mm_set_pd(SIMDE_FLOAT64_C( -123.20), SIMDE_FLOAT64_C( 469.36)),
3409 simde_mm_set_ps(SIMDE_FLOAT32_C( 459.13), SIMDE_FLOAT32_C( 794.72), SIMDE_FLOAT32_C( 105.91), SIMDE_FLOAT32_C( 469.36)) },
3410 { simde_mm_set_ps(SIMDE_FLOAT32_C( -699.67), SIMDE_FLOAT32_C( 751.26), SIMDE_FLOAT32_C( 72.14), SIMDE_FLOAT32_C( -162.03)),
3411 simde_mm_set_pd(SIMDE_FLOAT64_C( 868.66), SIMDE_FLOAT64_C( 138.18)),
3412 simde_mm_set_ps(SIMDE_FLOAT32_C( -699.67), SIMDE_FLOAT32_C( 751.26), SIMDE_FLOAT32_C( 72.14), SIMDE_FLOAT32_C( 138.18)) },
3413 { simde_mm_set_ps(SIMDE_FLOAT32_C( -144.80), SIMDE_FLOAT32_C( 372.44), SIMDE_FLOAT32_C( -878.31), SIMDE_FLOAT32_C( 984.43)),
3414 simde_mm_set_pd(SIMDE_FLOAT64_C( -559.54), SIMDE_FLOAT64_C( 112.58)),
3415 simde_mm_set_ps(SIMDE_FLOAT32_C( -144.80), SIMDE_FLOAT32_C( 372.44), SIMDE_FLOAT32_C( -878.31), SIMDE_FLOAT32_C( 112.58)) },
3416 { simde_mm_set_ps(SIMDE_FLOAT32_C( -958.65), SIMDE_FLOAT32_C( 333.33), SIMDE_FLOAT32_C( -940.30), SIMDE_FLOAT32_C( 396.81)),
3417 simde_mm_set_pd(SIMDE_FLOAT64_C( 263.65), SIMDE_FLOAT64_C( 199.76)),
3418 simde_mm_set_ps(SIMDE_FLOAT32_C( -958.65), SIMDE_FLOAT32_C( 333.33), SIMDE_FLOAT32_C( -940.30), SIMDE_FLOAT32_C( 199.76)) }
3419 };
3420
3421 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3422 simde__m128 r = simde_mm_cvtsd_ss(test_vec[i].a, test_vec[i].b);
3423 simde_assert_m128_close(r, test_vec[i].r, 1);
3424 }
3425
3426 return 0;
3427 }
3428
3429 static int
test_simde_x_mm_cvtsi128_si16(SIMDE_MUNIT_TEST_ARGS)3430 test_simde_x_mm_cvtsi128_si16 (SIMDE_MUNIT_TEST_ARGS) {
3431 static const struct {
3432 const int16_t a[8];
3433 const int16_t r;
3434 } test_vec[] = {
3435 { { -INT16_C( 30955), INT16_C( 704), -INT16_C( 12934), -INT16_C( 10158), INT16_C( 23505), INT16_C( 18623), -INT16_C( 30715), INT16_C( 30631) },
3436 -INT16_C( 30955) },
3437 { { -INT16_C( 18124), -INT16_C( 9599), -INT16_C( 23005), -INT16_C( 5882), -INT16_C( 24114), INT16_C( 22410), INT16_C( 23298), INT16_C( 6106) },
3438 -INT16_C( 18124) },
3439 { { -INT16_C( 25630), INT16_C( 23577), INT16_C( 27496), INT16_C( 14645), -INT16_C( 2874), -INT16_C( 13439), INT16_C( 10620), -INT16_C( 20158) },
3440 -INT16_C( 25630) },
3441 { { -INT16_C( 15390), INT16_C( 1675), -INT16_C( 28310), INT16_C( 14575), INT16_C( 31026), INT16_C( 13455), INT16_C( 27348), -INT16_C( 18613) },
3442 -INT16_C( 15390) },
3443 { { INT16_C( 25605), INT16_C( 27923), INT16_C( 18639), -INT16_C( 27226), INT16_C( 10301), -INT16_C( 18079), -INT16_C( 23727), INT16_C( 13162) },
3444 INT16_C( 25605) },
3445 { { -INT16_C( 2713), -INT16_C( 11975), INT16_C( 10630), -INT16_C( 18423), -INT16_C( 26206), INT16_C( 30700), INT16_C( 14083), INT16_C( 2094) },
3446 -INT16_C( 2713) },
3447 { { INT16_C( 16795), INT16_C( 27253), INT16_C( 7050), -INT16_C( 14592), INT16_C( 24899), -INT16_C( 27520), -INT16_C( 5372), INT16_C( 27592) },
3448 INT16_C( 16795) },
3449 { { INT16_C( 480), INT16_C( 26428), INT16_C( 17962), -INT16_C( 13025), INT16_C( 3295), -INT16_C( 7612), INT16_C( 29251), -INT16_C( 8214) },
3450 INT16_C( 480) }
3451 };
3452
3453 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3454 simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a);
3455 int16_t r = simde_x_mm_cvtsi128_si16(a);
3456 simde_assert_equal_i16(r, test_vec[i].r);
3457 }
3458
3459 return 0;
3460 }
3461
3462 static int
test_simde_mm_cvtsi128_si32(SIMDE_MUNIT_TEST_ARGS)3463 test_simde_mm_cvtsi128_si32(SIMDE_MUNIT_TEST_ARGS) {
3464 const struct {
3465 simde__m128i a;
3466 int32_t r;
3467 } test_vec[8] = {
3468 { simde_mm_set_epi32( 26453550, -127780894, 765191664, -1527053336), -1527053336 },
3469 { simde_mm_set_epi32(-2072408746, 654549051, -1035182329, -310311602), -310311602 },
3470 { simde_mm_set_epi32(-1491944780, -848128842, 200170171, -471300206), -471300206 },
3471 { simde_mm_set_epi32(-1218501110, 680592926, -869682471, -297305797), -297305797 },
3472 { simde_mm_set_epi32(-1884581495, -571508262, -111379645, -1274133785), -1274133785 },
3473 { simde_mm_set_epi32( 486988098, 416284528, 1359642222, 197671232), 197671232 },
3474 { simde_mm_set_epi32( 296562088, -1151305617, -1413122888, -1640910233), -1640910233 },
3475 { simde_mm_set_epi32(-1262725255, -1253335394, -91416000, -1892793314), -1892793314 }
3476 };
3477
3478 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3479 int32_t r = simde_mm_cvtsi128_si32(test_vec[i].a);
3480 simde_assert_equal_i32(r, test_vec[i].r);
3481 }
3482
3483 return 0;
3484 }
3485
3486 static int
test_simde_mm_cvtsi64_si128(SIMDE_MUNIT_TEST_ARGS)3487 test_simde_mm_cvtsi64_si128(SIMDE_MUNIT_TEST_ARGS) {
3488 const struct {
3489 int64_t a;
3490 simde__m128i r;
3491 } test_vec[8] = {
3492 { INT64_C( 6168135010467220065),
3493 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 6168135010467220065)) },
3494 { INT64_C( 3895170522828645721),
3495 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 3895170522828645721)) },
3496 { INT64_C( -3378210069702593578),
3497 simde_mm_set_epi64x(INT64_C( 0), INT64_C( -3378210069702593578)) },
3498 { INT64_C( 2750396577149404222),
3499 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 2750396577149404222)) },
3500 { INT64_C( 1438311486113044813),
3501 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 1438311486113044813)) },
3502 { INT64_C( 3416877519561179684),
3503 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 3416877519561179684)) },
3504 { INT64_C( 5633937201227624265),
3505 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 5633937201227624265)) },
3506 { INT64_C( -3544191055453826903),
3507 simde_mm_set_epi64x(INT64_C( 0), INT64_C( -3544191055453826903)) }
3508 };
3509
3510 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3511 simde__m128i r = simde_mm_cvtsi64_si128(test_vec[i].a);
3512 simde_assert_m128i_i64(r, ==, test_vec[i].r);
3513 }
3514
3515 return 0;
3516 }
3517
3518 static int
test_simde_mm_cvtsi128_si64(SIMDE_MUNIT_TEST_ARGS)3519 test_simde_mm_cvtsi128_si64(SIMDE_MUNIT_TEST_ARGS) {
3520 const struct {
3521 simde__m128i a;
3522 int64_t r;
3523 } test_vec[8] = {
3524 { simde_mm_set_epi64x(INT64_C( 6773505374496819552), INT64_C( 4667650958864037640)),
3525 INT64_C( 4667650958864037640) },
3526 { simde_mm_set_epi64x(INT64_C( 1327994882711935975), INT64_C( 6055234041306631062)),
3527 INT64_C( 6055234041306631062) },
3528 { simde_mm_set_epi64x(INT64_C( 8972445642279437044), INT64_C( -4761409530754735793)),
3529 INT64_C( -4761409530754735793) },
3530 { simde_mm_set_epi64x(INT64_C( 7460890732678939925), INT64_C( 5266150742597997743)),
3531 INT64_C( 5266150742597997743) },
3532 { simde_mm_set_epi64x(INT64_C( -6075061397734634308), INT64_C( 487741331498539771)),
3533 INT64_C( 487741331498539771) },
3534 { simde_mm_set_epi64x(INT64_C( 2874947710909797095), INT64_C( 2287065406213692181)),
3535 INT64_C( 2287065406213692181) },
3536 { simde_mm_set_epi64x(INT64_C( 8598185467708417568), INT64_C( -2745610728130306920)),
3537 INT64_C( -2745610728130306920) },
3538 { simde_mm_set_epi64x(INT64_C( 6122366414867950497), INT64_C( 614503884136124395)),
3539 INT64_C( 614503884136124395) }
3540 };
3541
3542 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3543 int64_t r = simde_mm_cvtsi128_si64(test_vec[i].a);
3544 simde_assert_equal_i64(r, test_vec[i].r);
3545 }
3546
3547 return 0;
3548 }
3549
3550 static int
test_simde_x_mm_cvtsi16_si128(SIMDE_MUNIT_TEST_ARGS)3551 test_simde_x_mm_cvtsi16_si128 (SIMDE_MUNIT_TEST_ARGS) {
3552 static const struct {
3553 const int16_t a;
3554 const int16_t r[8];
3555 } test_vec[] = {
3556 { -INT16_C( 17602),
3557 { -INT16_C( 17602), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
3558 { INT16_C( 26279),
3559 { INT16_C( 26279), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
3560 { -INT16_C( 15939),
3561 { -INT16_C( 15939), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
3562 { -INT16_C( 9973),
3563 { -INT16_C( 9973), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
3564 { -INT16_C( 7532),
3565 { -INT16_C( 7532), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
3566 { INT16_C( 4549),
3567 { INT16_C( 4549), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
3568 { INT16_C( 6325),
3569 { INT16_C( 6325), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
3570 { -INT16_C( 6958),
3571 { -INT16_C( 6958), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }
3572 };
3573
3574 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3575 int16_t a = test_vec[i].a;
3576 simde__m128i r = simde_x_mm_cvtsi16_si128(a);
3577 simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r));
3578 }
3579
3580 return 0;
3581 }
3582
3583 static int
test_simde_mm_cvtsi32_sd(SIMDE_MUNIT_TEST_ARGS)3584 test_simde_mm_cvtsi32_sd(SIMDE_MUNIT_TEST_ARGS) {
3585 const struct {
3586 simde__m128d a;
3587 int b;
3588 simde__m128d r;
3589 } test_vec[8] = {
3590 { simde_mm_set_pd(SIMDE_FLOAT64_C( -601.07), SIMDE_FLOAT64_C( 516.34)),
3591 -768,
3592 simde_mm_set_pd(SIMDE_FLOAT64_C( -601.07), SIMDE_FLOAT64_C( -768.00)) },
3593 { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.83), SIMDE_FLOAT64_C( 135.41)),
3594 -383,
3595 simde_mm_set_pd(SIMDE_FLOAT64_C( -939.83), SIMDE_FLOAT64_C( -383.00)) },
3596 { simde_mm_set_pd(SIMDE_FLOAT64_C( 503.65), SIMDE_FLOAT64_C( 859.44)),
3597 872,
3598 simde_mm_set_pd(SIMDE_FLOAT64_C( 503.65), SIMDE_FLOAT64_C( 872.00)) },
3599 { simde_mm_set_pd(SIMDE_FLOAT64_C( -76.44), SIMDE_FLOAT64_C( 854.87)),
3600 613,
3601 simde_mm_set_pd(SIMDE_FLOAT64_C( -76.44), SIMDE_FLOAT64_C( 613.00)) },
3602 { simde_mm_set_pd(SIMDE_FLOAT64_C( 520.67), SIMDE_FLOAT64_C( -993.40)),
3603 197,
3604 simde_mm_set_pd(SIMDE_FLOAT64_C( 520.67), SIMDE_FLOAT64_C( 197.00)) },
3605 { simde_mm_set_pd(SIMDE_FLOAT64_C( 461.59), SIMDE_FLOAT64_C( -572.51)),
3606 -157,
3607 simde_mm_set_pd(SIMDE_FLOAT64_C( 461.59), SIMDE_FLOAT64_C( -157.00)) },
3608 { simde_mm_set_pd(SIMDE_FLOAT64_C( -113.22), SIMDE_FLOAT64_C( 791.22)),
3609 -840,
3610 simde_mm_set_pd(SIMDE_FLOAT64_C( -113.22), SIMDE_FLOAT64_C( -840.00)) },
3611 { simde_mm_set_pd(SIMDE_FLOAT64_C( 707.47), SIMDE_FLOAT64_C( 954.02)),
3612 -347,
3613 simde_mm_set_pd(SIMDE_FLOAT64_C( 707.47), SIMDE_FLOAT64_C( -347.00)) }
3614 };
3615
3616 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3617 simde__m128d r = simde_mm_cvtsi32_sd(test_vec[i].a, test_vec[i].b);
3618 simde_assert_m128d_close(r, test_vec[i].r, 1);
3619 }
3620
3621 return 0;
3622 }
3623
3624 static int
test_simde_mm_cvtsi32_si128(SIMDE_MUNIT_TEST_ARGS)3625 test_simde_mm_cvtsi32_si128(SIMDE_MUNIT_TEST_ARGS) {
3626 const struct {
3627 int32_t a;
3628 simde__m128i r;
3629 } test_vec[8] = {
3630 { 306582644, simde_mm_set_epi32(0, 0, 0, 306582644) },
3631 { -365974780, simde_mm_set_epi32(0, 0, 0, -365974780) },
3632 { -85065628, simde_mm_set_epi32(0, 0, 0, -85065628) },
3633 { 1053254834, simde_mm_set_epi32(0, 0, 0, 1053254834) },
3634 { -236294791, simde_mm_set_epi32(0, 0, 0, -236294791) },
3635 { 1341442607, simde_mm_set_epi32(0, 0, 0, 1341442607) },
3636 { 336976017, simde_mm_set_epi32(0, 0, 0, 336976017) },
3637 { 1400276059, simde_mm_set_epi32(0, 0, 0, 1400276059) }
3638 };
3639
3640 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3641 simde__m128i r = simde_mm_cvtsi32_si128(test_vec[i].a);
3642 simde_assert_m128i_i32(r, ==, test_vec[i].r);
3643 }
3644
3645 return 0;
3646 }
3647
3648 static int
test_simde_mm_cvtsi64_sd(SIMDE_MUNIT_TEST_ARGS)3649 test_simde_mm_cvtsi64_sd(SIMDE_MUNIT_TEST_ARGS) {
3650 const struct {
3651 simde__m128d a;
3652 int64_t b;
3653 simde__m128d r;
3654 } test_vec[8] = {
3655 { simde_mm_set_pd(SIMDE_FLOAT64_C( 229.24), SIMDE_FLOAT64_C( 177.04)), 637,
3656 simde_mm_set_pd(SIMDE_FLOAT64_C( 229.24), SIMDE_FLOAT64_C( 637.00)) },
3657 { simde_mm_set_pd(SIMDE_FLOAT64_C( 401.10), SIMDE_FLOAT64_C( 284.52)), -162,
3658 simde_mm_set_pd(SIMDE_FLOAT64_C( 401.10), SIMDE_FLOAT64_C( -162.00)) },
3659 { simde_mm_set_pd(SIMDE_FLOAT64_C( 499.46), SIMDE_FLOAT64_C( 321.47)), -540,
3660 simde_mm_set_pd(SIMDE_FLOAT64_C( 499.46), SIMDE_FLOAT64_C( -540.00)) },
3661 { simde_mm_set_pd(SIMDE_FLOAT64_C( -710.92), SIMDE_FLOAT64_C( 858.14)), -64,
3662 simde_mm_set_pd(SIMDE_FLOAT64_C( -710.92), SIMDE_FLOAT64_C( -64.00)) },
3663 { simde_mm_set_pd(SIMDE_FLOAT64_C( -289.27), SIMDE_FLOAT64_C( -887.54)), -238,
3664 simde_mm_set_pd(SIMDE_FLOAT64_C( -289.27), SIMDE_FLOAT64_C( -238.00)) },
3665 { simde_mm_set_pd(SIMDE_FLOAT64_C( 865.34), SIMDE_FLOAT64_C( 242.15)), 121,
3666 simde_mm_set_pd(SIMDE_FLOAT64_C( 865.34), SIMDE_FLOAT64_C( 121.00)) },
3667 { simde_mm_set_pd(SIMDE_FLOAT64_C( -376.10), SIMDE_FLOAT64_C( -965.52)), 315,
3668 simde_mm_set_pd(SIMDE_FLOAT64_C( -376.10), SIMDE_FLOAT64_C( 315.00)) },
3669 { simde_mm_set_pd(SIMDE_FLOAT64_C( 673.51), SIMDE_FLOAT64_C( -882.88)), -72,
3670 simde_mm_set_pd(SIMDE_FLOAT64_C( 673.51), SIMDE_FLOAT64_C( -72.00)) }
3671 };
3672
3673 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3674 simde__m128d r = simde_mm_cvtsi64_sd(test_vec[i].a, test_vec[i].b);
3675 simde_assert_m128d_close(r, test_vec[i].r, 1);
3676 }
3677
3678 return 0;
3679 }
3680
3681 static int
test_simde_mm_cvtss_sd(SIMDE_MUNIT_TEST_ARGS)3682 test_simde_mm_cvtss_sd(SIMDE_MUNIT_TEST_ARGS) {
3683 const struct {
3684 simde__m128d a;
3685 simde__m128 b;
3686 simde__m128d r;
3687 } test_vec[8] = {
3688 { simde_mm_set_pd(SIMDE_FLOAT64_C( 546.48), SIMDE_FLOAT64_C( 729.20)),
3689 simde_mm_set_ps(SIMDE_FLOAT32_C( 142.68), SIMDE_FLOAT32_C( -75.76), SIMDE_FLOAT32_C(-648.72), SIMDE_FLOAT32_C( 148.36)),
3690 simde_mm_set_pd(SIMDE_FLOAT64_C( 546.48), SIMDE_FLOAT64_C( 148.36)) },
3691 { simde_mm_set_pd(SIMDE_FLOAT64_C( 79.60), SIMDE_FLOAT64_C( 648.49)),
3692 simde_mm_set_ps(SIMDE_FLOAT32_C( 631.34), SIMDE_FLOAT32_C( 902.53), SIMDE_FLOAT32_C( -54.65), SIMDE_FLOAT32_C( 614.98)),
3693 simde_mm_set_pd(SIMDE_FLOAT64_C( 79.60), SIMDE_FLOAT64_C( 614.98)) },
3694 { simde_mm_set_pd(SIMDE_FLOAT64_C( 811.47), SIMDE_FLOAT64_C( -95.71)),
3695 simde_mm_set_ps(SIMDE_FLOAT32_C( 13.27), SIMDE_FLOAT32_C( 315.63), SIMDE_FLOAT32_C( 407.80), SIMDE_FLOAT32_C(-826.61)),
3696 simde_mm_set_pd(SIMDE_FLOAT64_C( 811.47), SIMDE_FLOAT64_C(-826.61)) },
3697 { simde_mm_set_pd(SIMDE_FLOAT64_C( 315.42), SIMDE_FLOAT64_C( -69.06)),
3698 simde_mm_set_ps(SIMDE_FLOAT32_C( 775.15), SIMDE_FLOAT32_C( 935.54), SIMDE_FLOAT32_C(-964.44), SIMDE_FLOAT32_C( 659.62)),
3699 simde_mm_set_pd(SIMDE_FLOAT64_C( 315.42), SIMDE_FLOAT64_C( 659.62)) },
3700 { simde_mm_set_pd(SIMDE_FLOAT64_C(-579.75), SIMDE_FLOAT64_C(-291.65)),
3701 simde_mm_set_ps(SIMDE_FLOAT32_C( 533.61), SIMDE_FLOAT32_C( 565.53), SIMDE_FLOAT32_C( -36.93), SIMDE_FLOAT32_C( 57.54)),
3702 simde_mm_set_pd(SIMDE_FLOAT64_C(-579.75), SIMDE_FLOAT64_C( 57.54)) },
3703 { simde_mm_set_pd(SIMDE_FLOAT64_C( 979.09), SIMDE_FLOAT64_C(-471.44)),
3704 simde_mm_set_ps(SIMDE_FLOAT32_C( 927.62), SIMDE_FLOAT32_C( 955.93), SIMDE_FLOAT32_C(-964.80), SIMDE_FLOAT32_C( 823.88)),
3705 simde_mm_set_pd(SIMDE_FLOAT64_C( 979.09), SIMDE_FLOAT64_C( 823.88)) },
3706 { simde_mm_set_pd(SIMDE_FLOAT64_C( 172.44), SIMDE_FLOAT64_C(-427.74)),
3707 simde_mm_set_ps(SIMDE_FLOAT32_C(-343.18), SIMDE_FLOAT32_C(-352.03), SIMDE_FLOAT32_C(-836.30), SIMDE_FLOAT32_C( -61.82)),
3708 simde_mm_set_pd(SIMDE_FLOAT64_C( 172.44), SIMDE_FLOAT64_C( -61.82)) },
3709 { simde_mm_set_pd(SIMDE_FLOAT64_C( 975.42), SIMDE_FLOAT64_C( 394.72)),
3710 simde_mm_set_ps(SIMDE_FLOAT32_C( 748.90), SIMDE_FLOAT32_C(-410.84), SIMDE_FLOAT32_C( 636.92), SIMDE_FLOAT32_C( 230.31)),
3711 simde_mm_set_pd(SIMDE_FLOAT64_C( 975.42), SIMDE_FLOAT64_C( 230.31)) }
3712 };
3713
3714 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3715 simde__m128d r = simde_mm_cvtss_sd(test_vec[i].a, test_vec[i].b);
3716 simde_assert_m128d_close(r, test_vec[i].r, 2);
3717 }
3718
3719 return 0;
3720 }
3721
3722 static int
test_simde_mm_cvttpd_epi32(SIMDE_MUNIT_TEST_ARGS)3723 test_simde_mm_cvttpd_epi32 (SIMDE_MUNIT_TEST_ARGS) {
3724 static const struct {
3725 const simde_float64 a[2];
3726 const int32_t r[4];
3727 } test_vec[] = {
3728 #if !defined(SIMDE_FAST_NANS)
3729 { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN },
3730 { INT32_MIN, INT32_MIN, INT32_C( 0), INT32_C( 0) } },
3731 #endif
3732 #if !defined(SIMDE_FAST_CONVERSION_RANGE) && 0
3733 { { SIMDE_FLOAT64_C( 524.21), SIMDE_FLOAT64_C( 51.51) },
3734 { INT32_C( 524), INT32_C( 51), INT32_C( 0), INT32_C( 0) } },
3735 { { SIMDE_FLOAT64_C( 146.80), SIMDE_FLOAT64_C( -434.11) },
3736 { INT32_C( 146), -INT32_C( 434), INT32_C( 0), INT32_C( 0) } },
3737 { { SIMDE_FLOAT64_C( -150.72), SIMDE_FLOAT64_C( 743.64) },
3738 #endif
3739 { { SIMDE_FLOAT64_C( 788.74), SIMDE_FLOAT64_C( 212.17) },
3740 { INT32_C( 788), INT32_C( 212), INT32_C( 0), INT32_C( 0) } },
3741 { { SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( 455.86) },
3742 { -INT32_C( 172), INT32_C( 455), INT32_C( 0), INT32_C( 0) } },
3743 { { SIMDE_FLOAT64_C( -728.09), SIMDE_FLOAT64_C( 893.73) },
3744 { -INT32_C( 728), INT32_C( 893), INT32_C( 0), INT32_C( 0) } },
3745 { { SIMDE_FLOAT64_C( 333.21), SIMDE_FLOAT64_C( -914.29) },
3746 { INT32_C( 333), -INT32_C( 914), INT32_C( 0), INT32_C( 0) } },
3747 { { SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 701.07) },
3748 { INT32_C( 0), INT32_C( 701), INT32_C( 0), INT32_C( 0) } },
3749 { { SIMDE_FLOAT64_C( 639.75), SIMDE_FLOAT64_C( -803.13) },
3750 { INT32_C( 639), -INT32_C( 803), INT32_C( 0), INT32_C( 0) } }
3751 };
3752
3753 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3754 simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
3755 simde__m128i r = simde_mm_cvttpd_epi32(a);
3756 simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
3757 }
3758
3759 return 0;
3760 }
3761
3762 static int
3763 test_simde_mm_cvttpd_pi32 (SIMDE_MUNIT_TEST_ARGS) {
3764 static const struct {
3765 const simde_float64 a[2];
3766 const int32_t r[2];
3767 } test_vec[] = {
3768 #if !defined(SIMDE_FAST_NANS)
3769 { { SIMDE_MATH_NAN, -SIMDE_MATH_NAN },
3770 { INT32_MIN, INT32_MIN } },
3771 #endif
3772 #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3773 { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
3774 HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100) },
3775 { INT32_MIN, INT32_C( 2147483547) } },
3776 { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
3777 HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100) },
3778 { INT32_MIN, -INT32_C( 2147483548) } },
3779 #endif
3780 { { SIMDE_FLOAT64_C( 788.74), SIMDE_FLOAT64_C( 212.17) },
3781 { INT32_C( 788), INT32_C( 212) } },
3782 { { SIMDE_FLOAT64_C( -172.36), SIMDE_FLOAT64_C( 455.86) },
3783 { -INT32_C( 172), INT32_C( 455) } },
3784 { { SIMDE_FLOAT64_C( -728.09), SIMDE_FLOAT64_C( 893.73) },
3785 { -INT32_C( 728), INT32_C( 893) } },
3786 { { SIMDE_FLOAT64_C( 333.21), SIMDE_FLOAT64_C( -914.29) },
3787 { INT32_C( 333), -INT32_C( 914) } },
3788 { { SIMDE_FLOAT64_C( 0.95), SIMDE_FLOAT64_C( 701.07) },
3789 { INT32_C( 0), INT32_C( 701) } },
3790 { { SIMDE_FLOAT64_C( 639.75), SIMDE_FLOAT64_C( -803.13) },
3791 { INT32_C( 639), -INT32_C( 803) } }
3792 };
3793
3794 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3795 simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
3796 simde__m64 r = simde_mm_cvttpd_pi32(a);
3797 simde_test_x86_assert_equal_i32x2(r, simde_x_mm_load_si64(test_vec[i].r));
3798 }
3799
3800 return 0;
3801 }
3802
3803 static int
3804 test_simde_mm_cvttps_epi32 (SIMDE_MUNIT_TEST_ARGS) {
3805 static const struct {
3806 const simde_float32 a[4];
3807 const int32_t r[4];
3808 } test_vec[] = {
3809 #if !defined(SIMDE_FAST_NANS)
3810 { { SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -859.90), SIMDE_MATH_NANF, SIMDE_FLOAT32_C( -330.33) },
3811 { INT32_MIN, -INT32_C( 859), INT32_MIN, -INT32_C( 330) } },
3812 #endif
3813 #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3814 { { HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
3815 HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
3816 HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
3817 HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100) },
3818 { INT32_MIN, INT32_C( 2147483520), INT32_MIN, -INT32_C( 2147483520) } },
3819 #endif
3820 { { SIMDE_FLOAT32_C( 884.61), SIMDE_FLOAT32_C( 424.21), SIMDE_FLOAT32_C( 434.23), SIMDE_FLOAT32_C( -865.32) },
3821 { INT32_C( 884), INT32_C( 424), INT32_C( 434), -INT32_C( 865) } },
3822 { { SIMDE_FLOAT32_C( 3.17), SIMDE_FLOAT32_C( -163.40), SIMDE_FLOAT32_C( -490.56), SIMDE_FLOAT32_C( 628.48) },
3823 { INT32_C( 3), -INT32_C( 163), -INT32_C( 490), INT32_C( 628) } },
3824 { { SIMDE_FLOAT32_C( 629.16), SIMDE_FLOAT32_C( 267.90), SIMDE_FLOAT32_C( 468.27), SIMDE_FLOAT32_C( 765.29) },
3825 { INT32_C( 629), INT32_C( 267), INT32_C( 468), INT32_C( 765) } },
3826 { { SIMDE_FLOAT32_C( -532.39), SIMDE_FLOAT32_C( 448.09), SIMDE_FLOAT32_C( 543.36), SIMDE_FLOAT32_C( -643.97) },
3827 { -INT32_C( 532), INT32_C( 448), INT32_C( 543), -INT32_C( 643) } },
3828 { { SIMDE_FLOAT32_C( -958.61), SIMDE_FLOAT32_C( -434.16), SIMDE_FLOAT32_C( 958.20), SIMDE_FLOAT32_C( 749.69) },
3829 { -INT32_C( 958), -INT32_C( 434), INT32_C( 958), INT32_C( 749) } },
3830 { { SIMDE_FLOAT32_C( 379.97), SIMDE_FLOAT32_C( -697.16), SIMDE_FLOAT32_C( 790.54), SIMDE_FLOAT32_C( -387.37) },
3831 { INT32_C( 379), -INT32_C( 697), INT32_C( 790), -INT32_C( 387) } },
3832 { { SIMDE_FLOAT32_C( -785.26), SIMDE_FLOAT32_C( 403.54), SIMDE_FLOAT32_C( -475.03), SIMDE_FLOAT32_C( -577.41) },
3833 { -INT32_C( 785), INT32_C( 403), -INT32_C( 475), -INT32_C( 577) } }
3834 };
3835
3836 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3837 simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
3838 simde__m128i r = simde_mm_cvttps_epi32(a);
3839 simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
3840 }
3841
3842 return 0;
3843 }
3844
3845 static int
3846 test_simde_mm_cvttsd_si32 (SIMDE_MUNIT_TEST_ARGS) {
3847 static const struct {
3848 const simde_float64 a[2];
3849 const int32_t r;
3850 } test_vec[] = {
3851 #if !defined(SIMDE_FAST_NANS)
3852 { { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 248.78) },
3853 INT32_MIN },
3854 { { -SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 139.38) },
3855 INT32_MIN },
3856 #endif
3857 #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3858 { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
3859 SIMDE_FLOAT64_C( -850.89) },
3860 -INT32_C( 2147483648) },
3861 { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
3862 SIMDE_FLOAT64_C( -30.56) },
3863 -INT32_C( 2147483648) },
3864 { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
3865 SIMDE_FLOAT64_C( -742.09) },
3866 INT32_C( 2147483547) },
3867 { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100),
3868 SIMDE_FLOAT64_C( -496.27) },
3869 -INT32_C( 2147483548) },
3870 #endif
3871 { { SIMDE_FLOAT64_C( -57.42), SIMDE_FLOAT64_C( -705.99) },
3872 -INT32_C( 57) },
3873 { { SIMDE_FLOAT64_C( 737.15), SIMDE_FLOAT64_C( -394.42) },
3874 INT32_C( 737) },
3875 { { SIMDE_FLOAT64_C( -193.78), SIMDE_FLOAT64_C( 0.85) },
3876 -INT32_C( 193) },
3877 { { SIMDE_FLOAT64_C( -61.02), SIMDE_FLOAT64_C( 247.60) },
3878 -INT32_C( 61) },
3879 { { SIMDE_FLOAT64_C( 396.64), SIMDE_FLOAT64_C( 103.10) },
3880 INT32_C( 396) },
3881 { { SIMDE_FLOAT64_C( 606.36), SIMDE_FLOAT64_C( -703.92) },
3882 INT32_C( 606) },
3883 };
3884
3885 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3886 simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
3887 int32_t r = simde_mm_cvttsd_si32(a);
3888 simde_assert_equal_i32(r, test_vec[i].r);
3889 }
3890
3891 return 0;
3892 }
3893
3894
3895 static int
3896 test_simde_mm_cvttsd_si64(SIMDE_MUNIT_TEST_ARGS) {
3897 const struct {
3898 simde__m128d a;
3899 int64_t r;
3900 } test_vec[8] = {
3901 { simde_mm_set_pd(SIMDE_FLOAT64_C(-345.97), SIMDE_FLOAT64_C( 664.87)), 664 },
3902 { simde_mm_set_pd(SIMDE_FLOAT64_C( 648.27), SIMDE_FLOAT64_C(-390.19)), -390 },
3903 { simde_mm_set_pd(SIMDE_FLOAT64_C(-500.63), SIMDE_FLOAT64_C(-258.15)), -258 },
3904 { simde_mm_set_pd(SIMDE_FLOAT64_C(-109.22), SIMDE_FLOAT64_C(-784.27)), -784 },
3905 { simde_mm_set_pd(SIMDE_FLOAT64_C(-688.79), SIMDE_FLOAT64_C(-698.22)), -698 },
3906 { simde_mm_set_pd(SIMDE_FLOAT64_C(-914.25), SIMDE_FLOAT64_C(-650.88)), -650 },
3907 { simde_mm_set_pd(SIMDE_FLOAT64_C(-228.35), SIMDE_FLOAT64_C(-603.46)), -603 },
3908 { simde_mm_set_pd(SIMDE_FLOAT64_C(-556.94), SIMDE_FLOAT64_C( 694.64)), 694 }
3909 };
3910
3911 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3912 int64_t r = simde_mm_cvttsd_si64(test_vec[i].a);
3913 simde_assert_equal_i64(r, test_vec[i].r);
3914 }
3915
3916 return 0;
3917 }
3918
3919 static int
3920 test_simde_mm_div_pd(SIMDE_MUNIT_TEST_ARGS) {
3921 const struct {
3922 simde__m128d a;
3923 simde__m128d b;
3924 simde__m128d r;
3925 } test_vec[8] = {
3926 { simde_mm_set_pd(SIMDE_FLOAT64_C( -184.40), SIMDE_FLOAT64_C( 992.45)),
3927 simde_mm_set_pd(SIMDE_FLOAT64_C( 155.72), SIMDE_FLOAT64_C( 856.52)),
3928 simde_mm_set_pd(SIMDE_FLOAT64_C( -1.18), SIMDE_FLOAT64_C( 1.16)) },
3929 { simde_mm_set_pd(SIMDE_FLOAT64_C( -34.37), SIMDE_FLOAT64_C( 596.67)),
3930 simde_mm_set_pd(SIMDE_FLOAT64_C( -718.99), SIMDE_FLOAT64_C( -17.98)),
3931 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.05), SIMDE_FLOAT64_C( -33.19)) },
3932 { simde_mm_set_pd(SIMDE_FLOAT64_C( 797.64), SIMDE_FLOAT64_C( 669.98)),
3933 simde_mm_set_pd(SIMDE_FLOAT64_C( -872.55), SIMDE_FLOAT64_C( 857.06)),
3934 simde_mm_set_pd(SIMDE_FLOAT64_C( -0.91), SIMDE_FLOAT64_C( 0.78)) },
3935 { simde_mm_set_pd(SIMDE_FLOAT64_C( -864.62), SIMDE_FLOAT64_C( 635.60)),
3936 simde_mm_set_pd(SIMDE_FLOAT64_C( -556.59), SIMDE_FLOAT64_C( 676.91)),
3937 simde_mm_set_pd(SIMDE_FLOAT64_C( 1.55), SIMDE_FLOAT64_C( 0.94)) },
3938 { simde_mm_set_pd(SIMDE_FLOAT64_C( -635.32), SIMDE_FLOAT64_C( 518.94)),
3939 simde_mm_set_pd(SIMDE_FLOAT64_C( -426.58), SIMDE_FLOAT64_C( -331.30)),
3940 simde_mm_set_pd(SIMDE_FLOAT64_C( 1.49), SIMDE_FLOAT64_C( -1.57)) },
3941 { simde_mm_set_pd(SIMDE_FLOAT64_C( -494.90), SIMDE_FLOAT64_C( -42.04)),
3942 simde_mm_set_pd(SIMDE_FLOAT64_C( 432.98), SIMDE_FLOAT64_C( 277.38)),
3943 simde_mm_set_pd(SIMDE_FLOAT64_C( -1.14), SIMDE_FLOAT64_C( -0.15)) },
3944 { simde_mm_set_pd(SIMDE_FLOAT64_C( -530.41), SIMDE_FLOAT64_C( 112.07)),
3945 simde_mm_set_pd(SIMDE_FLOAT64_C( -914.02), SIMDE_FLOAT64_C( 90.15)),
3946 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.58), SIMDE_FLOAT64_C( 1.24)) },
3947 { simde_mm_set_pd(SIMDE_FLOAT64_C( -176.73), SIMDE_FLOAT64_C( -245.01)),
3948 simde_mm_set_pd(SIMDE_FLOAT64_C( 315.38), SIMDE_FLOAT64_C( -747.83)),
3949 simde_mm_set_pd(SIMDE_FLOAT64_C( -0.56), SIMDE_FLOAT64_C( 0.33)) }
3950 };
3951
3952 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3953 simde__m128d r = simde_mm_div_pd(test_vec[i].a, test_vec[i].b);
3954 simde_assert_m128d_close(r, test_vec[i].r, 1);
3955 }
3956
3957 return 0;
3958 }
3959
3960 static int
3961 test_simde_mm_div_sd(SIMDE_MUNIT_TEST_ARGS) {
3962 const struct {
3963 simde__m128d a;
3964 simde__m128d b;
3965 simde__m128d r;
3966 } test_vec[8] = {
3967 { simde_mm_set_pd(SIMDE_FLOAT64_C( -164.12), SIMDE_FLOAT64_C( -192.56)),
3968 simde_mm_set_pd(SIMDE_FLOAT64_C( -917.87), SIMDE_FLOAT64_C( 429.05)),
3969 simde_mm_set_pd(SIMDE_FLOAT64_C( -164.12), SIMDE_FLOAT64_C( -0.45)) },
3970 { simde_mm_set_pd(SIMDE_FLOAT64_C( 658.48), SIMDE_FLOAT64_C( 164.94)),
3971 simde_mm_set_pd(SIMDE_FLOAT64_C( -29.79), SIMDE_FLOAT64_C( 356.73)),
3972 simde_mm_set_pd(SIMDE_FLOAT64_C( 658.48), SIMDE_FLOAT64_C( 0.46)) },
3973 { simde_mm_set_pd(SIMDE_FLOAT64_C( -450.41), SIMDE_FLOAT64_C( 587.65)),
3974 simde_mm_set_pd(SIMDE_FLOAT64_C( 553.54), SIMDE_FLOAT64_C( -684.22)),
3975 simde_mm_set_pd(SIMDE_FLOAT64_C( -450.41), SIMDE_FLOAT64_C( -0.86)) },
3976 { simde_mm_set_pd(SIMDE_FLOAT64_C( -742.91), SIMDE_FLOAT64_C( -966.41)),
3977 simde_mm_set_pd(SIMDE_FLOAT64_C( 180.37), SIMDE_FLOAT64_C( 175.93)),
3978 simde_mm_set_pd(SIMDE_FLOAT64_C( -742.91), SIMDE_FLOAT64_C( -5.49)) },
3979 { simde_mm_set_pd(SIMDE_FLOAT64_C( -280.67), SIMDE_FLOAT64_C( 415.67)),
3980 simde_mm_set_pd(SIMDE_FLOAT64_C( 495.86), SIMDE_FLOAT64_C( -819.23)),
3981 simde_mm_set_pd(SIMDE_FLOAT64_C( -280.67), SIMDE_FLOAT64_C( -0.51)) },
3982 { simde_mm_set_pd(SIMDE_FLOAT64_C( 391.69), SIMDE_FLOAT64_C( 589.87)),
3983 simde_mm_set_pd(SIMDE_FLOAT64_C( -651.02), SIMDE_FLOAT64_C( -239.35)),
3984 simde_mm_set_pd(SIMDE_FLOAT64_C( 391.69), SIMDE_FLOAT64_C( -2.46)) },
3985 { simde_mm_set_pd(SIMDE_FLOAT64_C( 423.52), SIMDE_FLOAT64_C( 419.28)),
3986 simde_mm_set_pd(SIMDE_FLOAT64_C( 336.50), SIMDE_FLOAT64_C( 431.02)),
3987 simde_mm_set_pd(SIMDE_FLOAT64_C( 423.52), SIMDE_FLOAT64_C( 0.97)) },
3988 { simde_mm_set_pd(SIMDE_FLOAT64_C( 235.17), SIMDE_FLOAT64_C( -333.81)),
3989 simde_mm_set_pd(SIMDE_FLOAT64_C( -715.21), SIMDE_FLOAT64_C( 4.82)),
3990 simde_mm_set_pd(SIMDE_FLOAT64_C( 235.17), SIMDE_FLOAT64_C( -69.26)) }
3991 };
3992
3993 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3994 simde__m128d r = simde_mm_div_sd(test_vec[i].a, test_vec[i].b);
3995 simde_assert_m128d_close(r, test_vec[i].r, 1);
3996 }
3997
3998 return 0;
3999 }
4000
4001 static int
4002 test_simde_mm_extract_epi16(SIMDE_MUNIT_TEST_ARGS) {
4003 const struct {
4004 simde__m128i a;
4005 int32_t r;
4006 } test_vec[8] = {
4007 { simde_mm_set_epi16(INT16_C(-22888), INT16_C(-26241), INT16_C( 16094), INT16_C( 25365),
4008 INT16_C(-10975), INT16_C( -4323), INT16_C( 9478), INT16_C(-18966)), 54561 },
4009 { simde_mm_set_epi16(INT16_C( -4494), INT16_C(-23544), INT16_C( 12313), INT16_C( 19220),
4010 INT16_C( 16921), INT16_C( 9248), INT16_C( -1076), INT16_C(-18617)), 16921 },
4011 { simde_mm_set_epi16(INT16_C( 5051), INT16_C( 30913), INT16_C( 18404), INT16_C(-11820),
4012 INT16_C( 16495), INT16_C( 32647), INT16_C( 21150), INT16_C( 16664)), 16495 },
4013 { simde_mm_set_epi16(INT16_C( 987), INT16_C( 32176), INT16_C(-17758), INT16_C( 21096),
4014 INT16_C( -945), INT16_C( 5537), INT16_C( 5495), INT16_C(-18130)), 64591 },
4015 { simde_mm_set_epi16(INT16_C(-16046), INT16_C( 13714), INT16_C( 12272), INT16_C( 32151),
4016 INT16_C(-14156), INT16_C( 8851), INT16_C(-19624), INT16_C( -2653)), 51380 },
4017 { simde_mm_set_epi16(INT16_C(-28172), INT16_C( 1666), INT16_C( 15569), INT16_C( -1622),
4018 INT16_C( 22048), INT16_C(-24364), INT16_C( 2478), INT16_C( 20826)), 22048 },
4019 { simde_mm_set_epi16(INT16_C(-29653), INT16_C(-27750), INT16_C( 5027), INT16_C( -7816),
4020 INT16_C(-20852), INT16_C( 3178), INT16_C(-27881), INT16_C( 3156)), 44684 },
4021 { simde_mm_set_epi16(INT16_C(-26280), INT16_C( 27067), INT16_C( 10815), INT16_C(-30178),
4022 INT16_C(-26852), INT16_C( 26399), INT16_C(-30202), INT16_C(-11030)), 38684 }
4023 };
4024
4025 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4026 int32_t r = simde_mm_extract_epi16(test_vec[i].a, 3);
4027 simde_assert_equal_i32(r, test_vec[i].r);
4028 }
4029
4030 return 0;
4031 }
4032
4033 static int
4034 test_simde_mm_insert_epi16(SIMDE_MUNIT_TEST_ARGS) {
4035 const struct {
4036 simde__m128i a;
4037 int16_t b;
4038 simde__m128i r;
4039 } test_vec[8] = {
4040 { simde_mm_set_epi16(INT16_C(-18659), INT16_C( 19491), INT16_C(-30434), INT16_C( -6059),
4041 INT16_C( 11985), INT16_C( 5369), INT16_C(-14188), INT16_C( 9668)),
4042 21712,
4043 simde_mm_set_epi16(INT16_C(-18659), INT16_C( 19491), INT16_C(-30434), INT16_C( -6059),
4044 INT16_C( 21712), INT16_C( 5369), INT16_C(-14188), INT16_C( 9668)) },
4045 { simde_mm_set_epi16(INT16_C( 32684), INT16_C(-21716), INT16_C( 7657), INT16_C( 3627),
4046 INT16_C( 12377), INT16_C( 30609), INT16_C(-12611), INT16_C(-11955)),
4047 -27473,
4048 simde_mm_set_epi16(INT16_C( 32684), INT16_C(-21716), INT16_C( 7657), INT16_C( 3627),
4049 INT16_C(-27473), INT16_C( 30609), INT16_C(-12611), INT16_C(-11955)) },
4050 { simde_mm_set_epi16(INT16_C(-18344), INT16_C( -4896), INT16_C(-19094), INT16_C( -638),
4051 INT16_C(-30376), INT16_C(-17556), INT16_C(-31358), INT16_C(-17530)),
4052 -19116,
4053 simde_mm_set_epi16(INT16_C(-18344), INT16_C( -4896), INT16_C(-19094), INT16_C( -638),
4054 INT16_C(-19116), INT16_C(-17556), INT16_C(-31358), INT16_C(-17530)) },
4055 { simde_mm_set_epi16(INT16_C(-11121), INT16_C( 29288), INT16_C( -3915), INT16_C( 13306),
4056 INT16_C( 30582), INT16_C( 4374), INT16_C( -9323), INT16_C( -2317)),
4057 5778,
4058 simde_mm_set_epi16(INT16_C(-11121), INT16_C( 29288), INT16_C( -3915), INT16_C( 13306),
4059 INT16_C( 5778), INT16_C( 4374), INT16_C( -9323), INT16_C( -2317)) },
4060 { simde_mm_set_epi16(INT16_C( 7542), INT16_C(-16196), INT16_C(-24612), INT16_C( 8929),
4061 INT16_C(-16460), INT16_C( 17259), INT16_C( 672), INT16_C(-18076)),
4062 -411,
4063 simde_mm_set_epi16(INT16_C( 7542), INT16_C(-16196), INT16_C(-24612), INT16_C( 8929),
4064 INT16_C( -411), INT16_C( 17259), INT16_C( 672), INT16_C(-18076)) },
4065 { simde_mm_set_epi16(INT16_C( 15913), INT16_C(-18873), INT16_C( 14978), INT16_C( 31946),
4066 INT16_C( -6939), INT16_C( 26150), INT16_C( 18499), INT16_C(-16752)),
4067 1141,
4068 simde_mm_set_epi16(INT16_C( 15913), INT16_C(-18873), INT16_C( 14978), INT16_C( 31946),
4069 INT16_C( 1141), INT16_C( 26150), INT16_C( 18499), INT16_C(-16752)) },
4070 { simde_mm_set_epi16(INT16_C( 1093), INT16_C( -6101), INT16_C(-30747), INT16_C(-18266),
4071 INT16_C( 4085), INT16_C(-14478), INT16_C( -6279), INT16_C(-25531)),
4072 -18605,
4073 simde_mm_set_epi16(INT16_C( 1093), INT16_C( -6101), INT16_C(-30747), INT16_C(-18266),
4074 INT16_C(-18605), INT16_C(-14478), INT16_C( -6279), INT16_C(-25531)) },
4075 { simde_mm_set_epi16(INT16_C( -2463), INT16_C( -3389), INT16_C( 28311), INT16_C( -5667),
4076 INT16_C( 24886), INT16_C( 24368), INT16_C( 19484), INT16_C(-11581)),
4077 -17420,
4078 simde_mm_set_epi16(INT16_C( -2463), INT16_C( -3389), INT16_C( 28311), INT16_C( -5667),
4079 INT16_C(-17420), INT16_C( 24368), INT16_C( 19484), INT16_C(-11581)) }
4080 };
4081
4082 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4083 simde__m128i r = simde_mm_insert_epi16(test_vec[i].a, test_vec[i].b, 3);
4084 simde_assert_m128i_i16(r, ==, test_vec[i].r);
4085 }
4086
4087 return 0;
4088 }
4089
4090 static int
4091 test_simde_mm_load_pd (SIMDE_MUNIT_TEST_ARGS) {
4092 static const struct {
4093 const SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 a[2];
4094 const simde_float64 r[2];
4095 } test_vec[] = {
4096 { { SIMDE_FLOAT64_C( 319.94), SIMDE_FLOAT64_C( 593.75) },
4097 { SIMDE_FLOAT64_C( 319.94), SIMDE_FLOAT64_C( 593.75) } },
4098 { { SIMDE_FLOAT64_C( -220.38), SIMDE_FLOAT64_C( 646.62) },
4099 { SIMDE_FLOAT64_C( -220.38), SIMDE_FLOAT64_C( 646.62) } },
4100 { { SIMDE_FLOAT64_C( 769.82), SIMDE_FLOAT64_C( 960.64) },
4101 { SIMDE_FLOAT64_C( 769.82), SIMDE_FLOAT64_C( 960.64) } },
4102 { { SIMDE_FLOAT64_C( -283.67), SIMDE_FLOAT64_C( -795.53) },
4103 { SIMDE_FLOAT64_C( -283.67), SIMDE_FLOAT64_C( -795.53) } },
4104 { { SIMDE_FLOAT64_C( -643.22), SIMDE_FLOAT64_C( 246.89) },
4105 { SIMDE_FLOAT64_C( -643.22), SIMDE_FLOAT64_C( 246.89) } },
4106 { { SIMDE_FLOAT64_C( -842.54), SIMDE_FLOAT64_C( -513.83) },
4107 { SIMDE_FLOAT64_C( -842.54), SIMDE_FLOAT64_C( -513.83) } },
4108 { { SIMDE_FLOAT64_C( 635.37), SIMDE_FLOAT64_C( 836.97) },
4109 { SIMDE_FLOAT64_C( 635.37), SIMDE_FLOAT64_C( 836.97) } },
4110 { { SIMDE_FLOAT64_C( 838.72), SIMDE_FLOAT64_C( -197.92) },
4111 { SIMDE_FLOAT64_C( 838.72), SIMDE_FLOAT64_C( -197.92) } }
4112 };
4113
4114 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
4115 simde_test_x86_assert_equal_f64x2(simde_mm_load_pd(test_vec[i].r), simde_mm_loadu_pd(test_vec[i].r), 1);
4116 }
4117
4118 return 0;
4119 }
4120
4121 static int
4122 test_simde_mm_load_pd1(SIMDE_MUNIT_TEST_ARGS) {
4123 const struct {
4124 simde_float64 a;
4125 simde__m128d r;
4126 } test_vec[8] = {
4127 {SIMDE_FLOAT64_C( -639.28), simde_mm_set_pd(SIMDE_FLOAT64_C(-639.28), SIMDE_FLOAT64_C(-639.28)) },
4128 {SIMDE_FLOAT64_C( 754.31), simde_mm_set_pd(SIMDE_FLOAT64_C( 754.31), SIMDE_FLOAT64_C( 754.31)) },
4129 {SIMDE_FLOAT64_C( -143.09), simde_mm_set_pd(SIMDE_FLOAT64_C(-143.09), SIMDE_FLOAT64_C(-143.09)) },
4130 {SIMDE_FLOAT64_C( -509.95), simde_mm_set_pd(SIMDE_FLOAT64_C(-509.95), SIMDE_FLOAT64_C(-509.95)) },
4131 {SIMDE_FLOAT64_C( 357.11), simde_mm_set_pd(SIMDE_FLOAT64_C( 357.11), SIMDE_FLOAT64_C( 357.11)) },
4132 {SIMDE_FLOAT64_C( 414.83), simde_mm_set_pd(SIMDE_FLOAT64_C( 414.83), SIMDE_FLOAT64_C( 414.83)) },
4133 {SIMDE_FLOAT64_C( 416.46), simde_mm_set_pd(SIMDE_FLOAT64_C( 416.46), SIMDE_FLOAT64_C( 416.46)) },
4134 {SIMDE_FLOAT64_C( 167.42), simde_mm_set_pd(SIMDE_FLOAT64_C( 167.42), SIMDE_FLOAT64_C( 167.42)) }
4135 };
4136
4137 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4138 simde__m128d r = simde_mm_load_pd1(&test_vec[i].a);
4139 simde_assert_m128d_close(r, test_vec[i].r, 1);
4140 }
4141
4142 return 0;
4143 }
4144
4145 static int
4146 test_simde_mm_load_sd(SIMDE_MUNIT_TEST_ARGS) {
4147 const struct {
4148 simde_float64 a;
4149 simde__m128d r;
4150 } test_vec[8] = {
4151 {SIMDE_FLOAT64_C( 883.59), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 883.59)) },
4152 {SIMDE_FLOAT64_C( 719.08), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 719.08)) },
4153 {SIMDE_FLOAT64_C( -82.94), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -82.94)) },
4154 {SIMDE_FLOAT64_C( -87.79), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -87.79)) },
4155 {SIMDE_FLOAT64_C( 309.31), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 309.31)) },
4156 {SIMDE_FLOAT64_C( -987.67), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C(-987.67)) },
4157 {SIMDE_FLOAT64_C( 196.18), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 196.18)) },
4158 {SIMDE_FLOAT64_C( 313.82), simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 313.82)) }
4159 };
4160
4161 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4162 simde__m128d r = simde_mm_load_sd(&test_vec[i].a);
4163 simde_assert_m128d_close(r, test_vec[i].r, 1);
4164 }
4165
4166 return 0;
4167 }
4168
4169 static int
4170 test_simde_mm_load_si128(SIMDE_MUNIT_TEST_ARGS) {
4171 const struct {
4172 simde__m128i a;
4173 simde__m128i r;
4174 } test_vec[8] = {
4175 { simde_mm_set_epi32(INT32_C(-1485513264), INT32_C( 2130924320), INT32_C( 1226074611), INT32_C( -306486659)),
4176 simde_mm_set_epi32(INT32_C(-1485513264), INT32_C( 2130924320), INT32_C( 1226074611), INT32_C( -306486659)) },
4177 { simde_mm_set_epi32(INT32_C( 952258085), INT32_C( -534886765), INT32_C( -354984724), INT32_C(-1450427500)),
4178 simde_mm_set_epi32(INT32_C( 952258085), INT32_C( -534886765), INT32_C( -354984724), INT32_C(-1450427500)) },
4179 { simde_mm_set_epi32(INT32_C(-1417585996), INT32_C( 546041970), INT32_C(-1469146664), INT32_C(-2062567602)),
4180 simde_mm_set_epi32(INT32_C(-1417585996), INT32_C( 546041970), INT32_C(-1469146664), INT32_C(-2062567602)) },
4181 { simde_mm_set_epi32(INT32_C(-1482230799), INT32_C(-1421432180), INT32_C(-1588201284), INT32_C(-1267673212)),
4182 simde_mm_set_epi32(INT32_C(-1482230799), INT32_C(-1421432180), INT32_C(-1588201284), INT32_C(-1267673212)) },
4183 { simde_mm_set_epi32(INT32_C(-1153380991), INT32_C( 1838203743), INT32_C( -180063833), INT32_C( -699223421)),
4184 simde_mm_set_epi32(INT32_C(-1153380991), INT32_C( 1838203743), INT32_C( -180063833), INT32_C( -699223421)) },
4185 { simde_mm_set_epi32(INT32_C( -711752348), INT32_C( 464328511), INT32_C( 1773807699), INT32_C( 849844772)),
4186 simde_mm_set_epi32(INT32_C( -711752348), INT32_C( 464328511), INT32_C( 1773807699), INT32_C( 849844772)) },
4187 { simde_mm_set_epi32(INT32_C(-1083662155), INT32_C( -641783129), INT32_C(-1893537704), INT32_C( 1971283674)),
4188 simde_mm_set_epi32(INT32_C(-1083662155), INT32_C( -641783129), INT32_C(-1893537704), INT32_C( 1971283674)) },
4189 { simde_mm_set_epi32(INT32_C(-1329936037), INT32_C( -364329957), INT32_C(-1886427840), INT32_C(-1935682760)),
4190 simde_mm_set_epi32(INT32_C(-1329936037), INT32_C( -364329957), INT32_C(-1886427840), INT32_C(-1935682760)) }
4191 };
4192
4193 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4194 simde__m128i r = simde_mm_load_si128(&test_vec[i].a);
4195 simde_assert_m128i_i32(r, ==, test_vec[i].r);
4196 }
4197
4198 return 0;
4199 }
4200
4201 static int
4202 test_simde_mm_loadh_pd(SIMDE_MUNIT_TEST_ARGS) {
4203 const struct {
4204 simde__m128d a;
4205 simde_float64 b;
4206 simde__m128d r;
4207 } test_vec[8] = {
4208 { simde_mm_set_pd(SIMDE_FLOAT64_C( -625.39), SIMDE_FLOAT64_C( -212.79)),
4209 SIMDE_FLOAT64_C( -544.03),
4210 simde_mm_set_pd(SIMDE_FLOAT64_C( -544.03), SIMDE_FLOAT64_C( -212.79)) },
4211 { simde_mm_set_pd(SIMDE_FLOAT64_C( 906.58), SIMDE_FLOAT64_C( -446.43)),
4212 SIMDE_FLOAT64_C( -955.62),
4213 simde_mm_set_pd(SIMDE_FLOAT64_C( -955.62), SIMDE_FLOAT64_C( -446.43)) },
4214 { simde_mm_set_pd(SIMDE_FLOAT64_C( 575.38), SIMDE_FLOAT64_C( -468.21)),
4215 SIMDE_FLOAT64_C( -790.22),
4216 simde_mm_set_pd(SIMDE_FLOAT64_C( -790.22), SIMDE_FLOAT64_C( -468.21)) },
4217 { simde_mm_set_pd(SIMDE_FLOAT64_C( 375.10), SIMDE_FLOAT64_C( -731.74)),
4218 SIMDE_FLOAT64_C( 857.52),
4219 simde_mm_set_pd(SIMDE_FLOAT64_C( 857.52), SIMDE_FLOAT64_C( -731.74)) },
4220 { simde_mm_set_pd(SIMDE_FLOAT64_C( -48.07), SIMDE_FLOAT64_C( -201.78)),
4221 SIMDE_FLOAT64_C( -122.99),
4222 simde_mm_set_pd(SIMDE_FLOAT64_C( -122.99), SIMDE_FLOAT64_C( -201.78)) },
4223 { simde_mm_set_pd(SIMDE_FLOAT64_C( 97.66), SIMDE_FLOAT64_C( -743.76)),
4224 SIMDE_FLOAT64_C( 123.61),
4225 simde_mm_set_pd(SIMDE_FLOAT64_C( 123.61), SIMDE_FLOAT64_C( -743.76)) },
4226 { simde_mm_set_pd(SIMDE_FLOAT64_C( 622.43), SIMDE_FLOAT64_C( -815.78)),
4227 SIMDE_FLOAT64_C( -884.62),
4228 simde_mm_set_pd(SIMDE_FLOAT64_C( -884.62), SIMDE_FLOAT64_C( -815.78)) },
4229 { simde_mm_set_pd(SIMDE_FLOAT64_C( 233.02), SIMDE_FLOAT64_C( 337.90)),
4230 SIMDE_FLOAT64_C( 566.08),
4231 simde_mm_set_pd(SIMDE_FLOAT64_C( 566.08), SIMDE_FLOAT64_C( 337.90)) }
4232 };
4233
4234 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4235 simde__m128d r = simde_mm_loadh_pd(test_vec[i].a, &test_vec[i].b);
4236 simde_assert_m128d_close(r, test_vec[i].r, 4);
4237 }
4238
4239 return 0;
4240 }
4241
4242 static int
4243 test_simde_mm_loadl_epi64 (SIMDE_MUNIT_TEST_ARGS) {
4244 static const struct {
4245 const int64_t a;
4246 const int64_t r[2];
4247 } test_vec[] = {
4248 { -INT64_C( 5374209034103506743),
4249 { -INT64_C( 5374209034103506743), INT64_C(0) } },
4250 { -INT64_C( 8818261387786582106),
4251 { -INT64_C( 8818261387786582106), INT64_C(0) } },
4252 { INT64_C( 8778417490344874118),
4253 { INT64_C( 8778417490344874118), INT64_C(0) } },
4254 { INT64_C( 1040805703196854697),
4255 { INT64_C( 1040805703196854697), INT64_C(0) } },
4256 { -INT64_C( 6883770744639848089),
4257 { -INT64_C( 6883770744639848089), INT64_C(0) } },
4258 { -INT64_C( 6091281060752135947),
4259 { -INT64_C( 6091281060752135947), INT64_C(0) } },
4260 { INT64_C( 7649374694561713533),
4261 { INT64_C( 7649374694561713533), INT64_C(0) } },
4262 { INT64_C( 9018079017176557522),
4263 { INT64_C( 9018079017176557522), INT64_C(0) } }
4264 };
4265
4266 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
4267 simde__m128i r = simde_mm_loadl_epi64(SIMDE_ALIGN_CAST(simde__m128i const*, &test_vec[i].a));
4268 simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r));
4269 }
4270
4271 return 0;
4272 }
4273
4274 static int
4275 test_simde_mm_loadl_pd(SIMDE_MUNIT_TEST_ARGS) {
4276 const struct {
4277 simde__m128d a;
4278 simde_float64 b;
4279 simde__m128d r;
4280 } test_vec[8] = {
4281 { simde_mm_set_pd(SIMDE_FLOAT64_C( 398.25), SIMDE_FLOAT64_C( 169.44)),
4282 SIMDE_FLOAT64_C( 512.14),
4283 simde_mm_set_pd(SIMDE_FLOAT64_C( 398.25), SIMDE_FLOAT64_C( 512.14)) },
4284 { simde_mm_set_pd(SIMDE_FLOAT64_C( 747.05), SIMDE_FLOAT64_C( 122.36)),
4285 SIMDE_FLOAT64_C( -219.24),
4286 simde_mm_set_pd(SIMDE_FLOAT64_C( 747.05), SIMDE_FLOAT64_C( -219.24)) },
4287 { simde_mm_set_pd(SIMDE_FLOAT64_C( 806.68), SIMDE_FLOAT64_C( 439.45)),
4288 SIMDE_FLOAT64_C( 545.31),
4289 simde_mm_set_pd(SIMDE_FLOAT64_C( 806.68), SIMDE_FLOAT64_C( 545.31)) },
4290 { simde_mm_set_pd(SIMDE_FLOAT64_C( -494.45), SIMDE_FLOAT64_C( 273.54)),
4291 SIMDE_FLOAT64_C( 233.72),
4292 simde_mm_set_pd(SIMDE_FLOAT64_C( -494.45), SIMDE_FLOAT64_C( 233.72)) },
4293 { simde_mm_set_pd(SIMDE_FLOAT64_C( -435.22), SIMDE_FLOAT64_C( -790.14)),
4294 SIMDE_FLOAT64_C( 334.56),
4295 simde_mm_set_pd(SIMDE_FLOAT64_C( -435.22), SIMDE_FLOAT64_C( 334.56)) },
4296 { simde_mm_set_pd(SIMDE_FLOAT64_C( -387.92), SIMDE_FLOAT64_C( 587.13)),
4297 SIMDE_FLOAT64_C( 782.99),
4298 simde_mm_set_pd(SIMDE_FLOAT64_C( -387.92), SIMDE_FLOAT64_C( 782.99)) },
4299 { simde_mm_set_pd(SIMDE_FLOAT64_C( -656.10), SIMDE_FLOAT64_C( -868.90)),
4300 SIMDE_FLOAT64_C( -241.17),
4301 simde_mm_set_pd(SIMDE_FLOAT64_C( -656.10), SIMDE_FLOAT64_C( -241.17)) },
4302 { simde_mm_set_pd(SIMDE_FLOAT64_C( 155.11), SIMDE_FLOAT64_C( -412.38)),
4303 SIMDE_FLOAT64_C( 606.64),
4304 simde_mm_set_pd(SIMDE_FLOAT64_C( 155.11), SIMDE_FLOAT64_C( 606.64)) }
4305 };
4306
4307 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4308 simde__m128d r = simde_mm_loadl_pd(test_vec[i].a, &test_vec[i].b);
4309 simde_assert_m128d_close(r, test_vec[i].r, 4);
4310 }
4311
4312 return 0;
4313 }
4314
4315 static int
4316 test_simde_mm_loadr_pd(SIMDE_MUNIT_TEST_ARGS) {
4317 const struct {
4318 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 mem_addr[2];
4319 simde__m128d r;
4320 } test_vec[8] = {
4321 { {SIMDE_FLOAT64_C( 808.22), SIMDE_FLOAT64_C( -538.55) },
4322 simde_mm_set_pd(SIMDE_FLOAT64_C( 808.22), SIMDE_FLOAT64_C(-538.55)) },
4323 { {SIMDE_FLOAT64_C( 475.76), SIMDE_FLOAT64_C( 878.69) },
4324 simde_mm_set_pd(SIMDE_FLOAT64_C( 475.76), SIMDE_FLOAT64_C( 878.69)) },
4325 { {SIMDE_FLOAT64_C( -400.00), SIMDE_FLOAT64_C( -135.07) },
4326 simde_mm_set_pd(SIMDE_FLOAT64_C(-400.00), SIMDE_FLOAT64_C(-135.07)) },
4327 { {SIMDE_FLOAT64_C( -32.33), SIMDE_FLOAT64_C( -148.19) },
4328 simde_mm_set_pd(SIMDE_FLOAT64_C( -32.33), SIMDE_FLOAT64_C(-148.19)) },
4329 { {SIMDE_FLOAT64_C( -971.23), SIMDE_FLOAT64_C( -835.90) },
4330 simde_mm_set_pd(SIMDE_FLOAT64_C(-971.23), SIMDE_FLOAT64_C(-835.90)) },
4331 { {SIMDE_FLOAT64_C( -891.74), SIMDE_FLOAT64_C( -424.87) },
4332 simde_mm_set_pd(SIMDE_FLOAT64_C(-891.74), SIMDE_FLOAT64_C(-424.87)) },
4333 { {SIMDE_FLOAT64_C( -199.77), SIMDE_FLOAT64_C( 631.45) },
4334 simde_mm_set_pd(SIMDE_FLOAT64_C(-199.77), SIMDE_FLOAT64_C( 631.45)) },
4335 { {SIMDE_FLOAT64_C( 410.30), SIMDE_FLOAT64_C( 721.68) },
4336 simde_mm_set_pd(SIMDE_FLOAT64_C( 410.30), SIMDE_FLOAT64_C( 721.68)) }
4337 };
4338
4339 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4340 simde__m128d r = simde_mm_loadr_pd(test_vec[i].mem_addr);
4341 simde_assert_m128d_close(r, test_vec[i].r, 1);
4342 }
4343
4344 return 0;
4345 }
4346
4347 static int
4348 test_simde_mm_loadu_pd(SIMDE_MUNIT_TEST_ARGS) {
4349 const struct {
4350 simde_float64 mem_addr[2];
4351 simde__m128d r;
4352 } test_vec[8] = {
4353 { {SIMDE_FLOAT64_C( -578.02), SIMDE_FLOAT64_C( 20.66) },
4354 simde_mm_set_pd(SIMDE_FLOAT64_C( 20.66), SIMDE_FLOAT64_C(-578.02)) },
4355 { {SIMDE_FLOAT64_C( 370.06), SIMDE_FLOAT64_C( -720.89) },
4356 simde_mm_set_pd(SIMDE_FLOAT64_C(-720.89), SIMDE_FLOAT64_C( 370.06)) },
4357 { {SIMDE_FLOAT64_C( 584.38), SIMDE_FLOAT64_C( -849.44) },
4358 simde_mm_set_pd(SIMDE_FLOAT64_C(-849.44), SIMDE_FLOAT64_C( 584.38)) },
4359 { {SIMDE_FLOAT64_C( 636.90), SIMDE_FLOAT64_C( 349.95) },
4360 simde_mm_set_pd(SIMDE_FLOAT64_C( 349.95), SIMDE_FLOAT64_C( 636.90)) },
4361 { {SIMDE_FLOAT64_C( -617.52), SIMDE_FLOAT64_C( 599.47) },
4362 simde_mm_set_pd(SIMDE_FLOAT64_C( 599.47), SIMDE_FLOAT64_C(-617.52)) },
4363 { {SIMDE_FLOAT64_C( 633.70), SIMDE_FLOAT64_C( 30.57) },
4364 simde_mm_set_pd(SIMDE_FLOAT64_C( 30.57), SIMDE_FLOAT64_C( 633.70)) },
4365 { {SIMDE_FLOAT64_C( -333.40), SIMDE_FLOAT64_C( 592.38) },
4366 simde_mm_set_pd(SIMDE_FLOAT64_C( 592.38), SIMDE_FLOAT64_C(-333.40)) },
4367 { {SIMDE_FLOAT64_C( -335.86), SIMDE_FLOAT64_C( 212.26) },
4368 simde_mm_set_pd(SIMDE_FLOAT64_C( 212.26), SIMDE_FLOAT64_C(-335.86)) }
4369 };
4370
4371 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4372 simde__m128d r = simde_mm_loadu_pd(test_vec[i].mem_addr);
4373 simde_assert_m128d_close(r, test_vec[i].r, 1);
4374 }
4375
4376 return 0;
4377 }
4378
4379 static int
4380 test_simde_mm_loadu_si128 (SIMDE_MUNIT_TEST_ARGS) {
4381 #if !defined(__clang__) || !defined(SIMDE_ARCH_ARM)
4382 static const struct {
4383 const int32_t a[4];
4384 } test_vec[] = {
4385 { { -INT32_C( 431648212), INT32_C( 784010008), -INT32_C( 1621020084), INT32_C( 1563595920) } },
4386 { { -INT32_C( 2020743978), INT32_C( 642031476), -INT32_C( 1168838661), -INT32_C( 71485745) } },
4387 { { -INT32_C( 505281848), INT32_C( 1510972686), INT32_C( 1626960080), -INT32_C( 608359675) } },
4388 { { -INT32_C( 2073933297), -INT32_C( 441800983), INT32_C( 1688206997), -INT32_C( 44016587) } },
4389 { { INT32_C( 1843282527), -INT32_C( 1345851937), INT32_C( 1661976670), -INT32_C( 79770388) } },
4390 { { INT32_C( 8364054), -INT32_C( 605738426), INT32_C( 1564443688), INT32_C( 1079746529) } },
4391 { { -INT32_C( 1045612063), -INT32_C( 2056227801), INT32_C( 552108084), INT32_C( 1662789196) } },
4392 { { INT32_C( 207854534), -INT32_C( 286832443), -INT32_C( 1387583796), -INT32_C( 1477597498) } }
4393 };
4394
4395 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
4396 int32_t r[4];
4397 simde_mm_storeu_si128(SIMDE_ALIGN_CAST(simde__m128i*, r), simde_mm_loadu_si128(SIMDE_ALIGN_CAST(const simde__m128i*, test_vec[i].a)));
4398 simde_assert_equal_vi32(sizeof(r) / sizeof(r[0]), r, test_vec[i].a);
4399 }
4400 #endif
4401
4402 return 0;
4403 }
4404
4405 static int
4406 test_simde_mm_loadu_si16 (SIMDE_MUNIT_TEST_ARGS) {
4407 static const struct {
4408 const int16_t a;
4409 const int16_t r[8];
4410 } test_vec[] = {
4411 { -INT16_C( 11138),
4412 { -INT16_C( 11138), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
4413 { INT16_C( 23724),
4414 { INT16_C( 23724), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
4415 { INT16_C( 14484),
4416 { INT16_C( 14484), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
4417 { INT16_C( 13428),
4418 { INT16_C( 13428), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
4419 { -INT16_C( 4679),
4420 { -INT16_C( 4679), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
4421 { -INT16_C( 27444),
4422 { -INT16_C( 27444), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
4423 { INT16_C( 23920),
4424 { INT16_C( 23920), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } },
4425 { INT16_C( 10692),
4426 { INT16_C( 10692), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) } }
4427 };
4428
4429 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
4430 int16_t a = test_vec[i].a;
4431 simde__m128i r = HEDLEY_CONCAT(simde,_mm_loadu_si16)(&a);
4432 simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r));
4433 }
4434
4435 return 0;
4436 }
4437
4438 static int
4439 test_simde_mm_loadu_si32 (SIMDE_MUNIT_TEST_ARGS) {
4440 static const struct {
4441 const int32_t a;
4442 const int32_t r[4];
4443 } test_vec[] = {
4444 { INT32_C( 418822831),
4445 { INT32_C( 418822831), INT32_C( 0), INT32_C( 0), INT32_C( 0) } },
4446 { INT32_C( 1942173819),
4447 { INT32_C( 1942173819), INT32_C( 0), INT32_C( 0), INT32_C( 0) } },
4448 { INT32_C( 1655488478),
4449 { INT32_C( 1655488478), INT32_C( 0), INT32_C( 0), INT32_C( 0) } },
4450 { -INT32_C( 1203443910),
4451 { -INT32_C( 1203443910), INT32_C( 0), INT32_C( 0), INT32_C( 0) } },
4452 { INT32_C( 1326772667),
4453 { INT32_C( 1326772667), INT32_C( 0), INT32_C( 0), INT32_C( 0) } },
4454 { -INT32_C( 494630871),
4455 { -INT32_C( 494630871), INT32_C( 0), INT32_C( 0), INT32_C( 0) } },
4456 { -INT32_C( 445230987),
4457 { -INT32_C( 445230987), INT32_C( 0), INT32_C( 0), INT32_C( 0) } },
4458 { INT32_C( 1544436653),
4459 { INT32_C( 1544436653), INT32_C( 0), INT32_C( 0), INT32_C( 0) } }
4460 };
4461
4462 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
4463 int32_t a = test_vec[i].a;
4464 simde__m128i r = HEDLEY_CONCAT(simde,_mm_loadu_si32)(&a);
4465 simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
4466 }
4467
4468 return 0;
4469 }
4470
4471 static int
4472 test_simde_mm_loadu_si64 (SIMDE_MUNIT_TEST_ARGS) {
4473 static const struct {
4474 const int64_t a;
4475 const int64_t r[2];
4476 } test_vec[] = {
4477 { INT64_C( 1937454096935355637),
4478 { INT64_C( 1937454096935355637), INT64_C( 0) } },
4479 { INT64_C( 3668957564122271735),
4480 { INT64_C( 3668957564122271735), INT64_C( 0) } },
4481 { -INT64_C( 235024424980250958),
4482 { -INT64_C( 235024424980250958), INT64_C( 0) } },
4483 { INT64_C( 7233045361154208854),
4484 { INT64_C( 7233045361154208854), INT64_C( 0) } },
4485 { INT64_C( 309823741680211445),
4486 { INT64_C( 309823741680211445), INT64_C( 0) } },
4487 { INT64_C( 4463101911464528198),
4488 { INT64_C( 4463101911464528198), INT64_C( 0) } },
4489 { -INT64_C( 3557326416991718882),
4490 { -INT64_C( 3557326416991718882), INT64_C( 0) } },
4491 { INT64_C( 7628307720165229322),
4492 { INT64_C( 7628307720165229322), INT64_C( 0) } }
4493 };
4494
4495 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
4496 int64_t a = test_vec[i].a;
4497 simde__m128i r = HEDLEY_CONCAT(simde,_mm_loadu_si64)(&a);
4498 simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r));
4499 }
4500
4501 return 0;
4502 }
4503
4504 static int
4505 test_simde_mm_madd_epi16(SIMDE_MUNIT_TEST_ARGS) {
4506 const struct {
4507 simde__m128i a;
4508 simde__m128i b;
4509 simde__m128i r;
4510 } test_vec[8] = {
4511 { simde_mm_set_epi16(INT16_C( 24289), INT16_C( 22642), INT16_C( 24338), INT16_C( 21466),
4512 INT16_C(-21399), INT16_C(-25114), INT16_C( -38), INT16_C( 24157)),
4513 simde_mm_set_epi16(INT16_C( -9939), INT16_C(-13077), INT16_C(-13691), INT16_C(-22496),
4514 INT16_C( -7750), INT16_C( 2620), INT16_C(-25114), INT16_C( 6535)),
4515 simde_mm_set_epi32( -537497805, -816110694, 100043570, 158820327) },
4516 { simde_mm_set_epi16(INT16_C( 13645), INT16_C( 20663), INT16_C( 21053), INT16_C( 5963),
4517 INT16_C( -9189), INT16_C( -1395), INT16_C( 25221), INT16_C( 27070)),
4518 simde_mm_set_epi16(INT16_C(-28936), INT16_C(-24038), INT16_C( 6837), INT16_C( 18195),
4519 INT16_C(-15316), INT16_C(-11637), INT16_C( 5200), INT16_C( 7885)),
4520 simde_mm_set_epi32( -891528914, 252436146, 156972339, 344596150) },
4521 { simde_mm_set_epi16(INT16_C(-31367), INT16_C(-13886), INT16_C( 25125), INT16_C(-12503),
4522 INT16_C( 15451), INT16_C( -6370), INT16_C(-24113), INT16_C( 2653)),
4523 simde_mm_set_epi16(INT16_C( 5595), INT16_C(-23387), INT16_C(-11854), INT16_C( 264),
4524 INT16_C( 15071), INT16_C( 3868), INT16_C(-30127), INT16_C( 2383)),
4525 simde_mm_set_epi32( 149253517, -301132542, 208222861, 732774450) },
4526 { simde_mm_set_epi16(INT16_C( 6250), INT16_C( 794), INT16_C( -7973), INT16_C( 27046),
4527 INT16_C( 13164), INT16_C( 16469), INT16_C( 1989), INT16_C(-24542)),
4528 simde_mm_set_epi16(INT16_C( 22452), INT16_C( -4402), INT16_C(-26550), INT16_C( 31252),
4529 INT16_C(-29251), INT16_C( -1650), INT16_C(-13867), INT16_C( 26387)),
4530 simde_mm_set_epi32( 136829812, 1056924742, -412234014, -675171217) },
4531 { simde_mm_set_epi16(INT16_C(-17693), INT16_C(-18978), INT16_C( 22797), INT16_C( 31393),
4532 INT16_C( 32262), INT16_C(-21009), INT16_C( 9435), INT16_C( 20059)),
4533 simde_mm_set_epi16(INT16_C( 20064), INT16_C( 4406), INT16_C( 1105), INT16_C(-32185),
4534 INT16_C( 26331), INT16_C( -8672), INT16_C( 15113), INT16_C(-24381)),
4535 simde_mm_set_epi32( -438609420, -985193020, 1031680770, -346467324) },
4536 { simde_mm_set_epi16(INT16_C( -9214), INT16_C(-31455), INT16_C(-14871), INT16_C( -8603),
4537 INT16_C( 17039), INT16_C(-27694), INT16_C( 18091), INT16_C( 27811)),
4538 simde_mm_set_epi16(INT16_C( 9903), INT16_C( 7626), INT16_C( -7009), INT16_C(-11696),
4539 INT16_C(-31989), INT16_C( 28434), INT16_C(-24743), INT16_C(-27058)),
4540 simde_mm_set_epi32( -331122072, 204851527, -1332511767, -1200135651) },
4541 { simde_mm_set_epi16(INT16_C( 20741), INT16_C( 5382), INT16_C(-29692), INT16_C( 12589),
4542 INT16_C( 21204), INT16_C( 3076), INT16_C(-24365), INT16_C( -1783)),
4543 simde_mm_set_epi16(INT16_C(-15203), INT16_C(-26894), INT16_C( -6878), INT16_C(-23472),
4544 INT16_C(-18994), INT16_C( 11044), INT16_C( 15739), INT16_C( -241)),
4545 simde_mm_set_epi32( -460068931, -91267432, -368777432, -383051032) },
4546 { simde_mm_set_epi16(INT16_C( 24682), INT16_C( 17647), INT16_C(-19806), INT16_C(-13656),
4547 INT16_C( 26394), INT16_C( 4814), INT16_C( -4589), INT16_C( 17983)),
4548 simde_mm_set_epi16(INT16_C(-32304), INT16_C(-30224), INT16_C(-20430), INT16_C(-28018),
4549 INT16_C( 29012), INT16_C( 7494), INT16_C( -7871), INT16_C( 16228)),
4550 simde_mm_set_epi32(-1330690256, 787250388, 801818844, 327948143) }
4551 };
4552
4553 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4554 simde__m128i r = simde_mm_madd_epi16(test_vec[i].a, test_vec[i].b);
4555 simde_assert_m128i_i32(r, ==, test_vec[i].r);
4556 }
4557
4558 return 0;
4559 }
4560
4561 static int
4562 test_simde_mm_maskmoveu_si128(SIMDE_MUNIT_TEST_ARGS) {
4563 const struct {
4564 simde__m128i a;
4565 simde__m128i mask;
4566 int8_t i[16];
4567 int8_t r[16];
4568 } test_vec[8] = {
4569 { simde_mm_set_epi8(INT8_C(-127), INT8_C( 121), INT8_C( 33), INT8_C( 92), INT8_C( 95), INT8_C( 30), INT8_C( 117), INT8_C( 103),
4570 INT8_C( -74), INT8_C( -18), INT8_C( 28), INT8_C( 15), INT8_C(-111), INT8_C( -65), INT8_C( 117), INT8_C( -8)),
4571 simde_mm_set_epi8(INT8_C( 42), INT8_C( 73), INT8_C( 1), INT8_C( -97), INT8_C( -45), INT8_C( 24), INT8_C( 88), INT8_C( -76),
4572 INT8_C(-109), INT8_C( 78), INT8_C( -80), INT8_C( -97), INT8_C( 85), INT8_C( 102), INT8_C( 36), INT8_C( -19)),
4573 { 0, -116, -92, 122, -68, 23, -43, 86,
4574 -50, -28, -43, -3, 19, -114, 122, 62 },
4575 { -8, -116, -92, 122, 15, 28, -43, -74,
4576 103, -28, -43, 95, 92, -114, 122, 62 } },
4577 { simde_mm_set_epi8(INT8_C( 70), INT8_C( 102), INT8_C( -59), INT8_C( 6), INT8_C( -83), INT8_C( 40), INT8_C( -32), INT8_C( 43),
4578 INT8_C( 90), INT8_C(-118), INT8_C( 82), INT8_C( 24), INT8_C(-106), INT8_C( -61), INT8_C( -19), INT8_C(-101)),
4579 simde_mm_set_epi8(INT8_C( 17), INT8_C( -58), INT8_C( -68), INT8_C( 86), INT8_C( 20), INT8_C( 40), INT8_C( 60), INT8_C( 30),
4580 INT8_C( 31), INT8_C( 30), INT8_C( 18), INT8_C( 67), INT8_C( -9), INT8_C( 103), INT8_C( 21), INT8_C( -50)),
4581 { -125, 126, -10, -60, 19, 92, -8, -124,
4582 81, -122, 122, 13, -58, 49, -122, 24 },
4583 { -101, 126, -10, -106, 19, 92, -8, -124,
4584 81, -122, 122, 13, -58, -59, 102, 24 } },
4585 { simde_mm_set_epi8(INT8_C( -26), INT8_C(-127), INT8_C( 58), INT8_C( -79), INT8_C( -88), INT8_C(-105), INT8_C( -66), INT8_C( 41),
4586 INT8_C( 75), INT8_C( -34), INT8_C( 97), INT8_C( -55), INT8_C( -65), INT8_C( -30), INT8_C( 23), INT8_C( 28)),
4587 simde_mm_set_epi8(INT8_C(-107), INT8_C( 69), INT8_C( -28), INT8_C( -40), INT8_C( 105), INT8_C( 0), INT8_C( 114), INT8_C( 113),
4588 INT8_C( -65), INT8_C( -82), INT8_C( 87), INT8_C( -14), INT8_C( -36), INT8_C( 68), INT8_C(-120), INT8_C( 38)),
4589 { -114, 55, -16, 51, 110, -44, 59, -6,
4590 43, -95, -82, 119, -56, 9, -47, -20 },
4591 { -114, 23, -16, -65, -55, -44, -34, 75,
4592 43, -95, -82, 119, -79, 58, -47, -26 } },
4593 { simde_mm_set_epi8(INT8_C( 87), INT8_C( 99), INT8_C( 22), INT8_C( 78), INT8_C( 93), INT8_C( -44), INT8_C( -98), INT8_C( 62),
4594 INT8_C( -97), INT8_C( -50), INT8_C( -31), INT8_C(-109), INT8_C( 10), INT8_C( -86), INT8_C( -15), INT8_C( 7)),
4595 simde_mm_set_epi8(INT8_C( 96), INT8_C( 76), INT8_C( 14), INT8_C(-114), INT8_C( 84), INT8_C( -85), INT8_C( 61), INT8_C( -38),
4596 INT8_C( -45), INT8_C( -83), INT8_C( -13), INT8_C( -50), INT8_C( -35), INT8_C(-111), INT8_C(-107), INT8_C( -50)),
4597 { -8, 64, -68, 23, -101, 35, 126, 119,
4598 -47, 4, 79, 23, 113, 117, -76, 93 },
4599 { 7, -15, -86, 10, -109, -31, -50, -97,
4600 62, 4, -44, 23, 78, 117, -76, 93 } },
4601 { simde_mm_set_epi8(INT8_C( -38), INT8_C(-119), INT8_C( -46), INT8_C( -13), INT8_C( -3), INT8_C(-122), INT8_C( 75), INT8_C( 123),
4602 INT8_C( 26), INT8_C( -71), INT8_C( -42), INT8_C(-124), INT8_C( 49), INT8_C( 1), INT8_C( 5), INT8_C( -90)),
4603 simde_mm_set_epi8(INT8_C( 116), INT8_C( -8), INT8_C( 33), INT8_C( 63), INT8_C(-110), INT8_C( -94), INT8_C( -34), INT8_C( 66),
4604 INT8_C( 51), INT8_C(-117), INT8_C( 28), INT8_C( -95), INT8_C( -52), INT8_C( 122), INT8_C( 118), INT8_C( -99)),
4605 { -45, 85, 12, 62, -89, -105, 90, -19,
4606 48, 18, -59, -98, -30, -113, 13, 91 },
4607 { -90, 85, 12, 49, -124, -105, -71, -19,
4608 48, 75, -122, -3, -30, -113, -119, 91 } },
4609 { simde_mm_set_epi8(INT8_C( 52), INT8_C( 36), INT8_C( 112), INT8_C( 70), INT8_C( 110), INT8_C( 75), INT8_C( -6), INT8_C(-101),
4610 INT8_C( 3), INT8_C( 113), INT8_C( -32), INT8_C( 119), INT8_C( -19), INT8_C( 117), INT8_C( 31), INT8_C( 119)),
4611 simde_mm_set_epi8(INT8_C( 79), INT8_C( 39), INT8_C( 6), INT8_C( 30), INT8_C( 120), INT8_C( -75), INT8_C( 26), INT8_C( 57),
4612 INT8_C( 123), INT8_C( 70), INT8_C( 40), INT8_C( 84), INT8_C( 111), INT8_C( -59), INT8_C( -79), INT8_C( -87)),
4613 { -74, -2, -124, 73, 44, 83, 18, -48,
4614 -78, 27, 30, -26, -3, 56, 89, 125 },
4615 { 119, 31, 117, 73, 44, 83, 18, -48,
4616 -78, 27, 75, -26, -3, 56, 89, 125 } },
4617 { simde_mm_set_epi8(INT8_C( -83), INT8_C( -42), INT8_C(-124), INT8_C( -81), INT8_C( -65), INT8_C( 46), INT8_C( -62), INT8_C( 102),
4618 INT8_C( 50), INT8_C( 6), INT8_C( -6), INT8_C( -21), INT8_C( -51), INT8_C(-114), INT8_C(-126), INT8_C( 74)),
4619 simde_mm_set_epi8(INT8_C( 127), INT8_C(-119), INT8_C( -85), INT8_C( 95), INT8_C( 70), INT8_C( -40), INT8_C( 17), INT8_C( 124),
4620 INT8_C( -61), INT8_C(-126), INT8_C( 117), INT8_C( -42), INT8_C( 62), INT8_C(-111), INT8_C(-103), INT8_C( 57)),
4621 { 108, 40, -118, -12, 90, -12, -24, 50,
4622 104, 87, -20, -86, -53, 25, -94, -101 },
4623 { 108, -126, -114, -12, -21, -12, 6, 50,
4624 104, 87, 46, -86, -53, -124, -42, -101 } },
4625 { simde_mm_set_epi8(INT8_C( -23), INT8_C( -92), INT8_C( 93), INT8_C( -78), INT8_C( -39), INT8_C( -72), INT8_C( -43), INT8_C( 1),
4626 INT8_C(-121), INT8_C( 103), INT8_C( 61), INT8_C( 82), INT8_C( 45), INT8_C(-120), INT8_C( -86), INT8_C( 51)),
4627 simde_mm_set_epi8(INT8_C( -82), INT8_C( 92), INT8_C( -46), INT8_C( -97), INT8_C( 59), INT8_C( -16), INT8_C( 95), INT8_C( -85),
4628 INT8_C( 38), INT8_C( 36), INT8_C(-125), INT8_C( 74), INT8_C( 14), INT8_C( 75), INT8_C( -74), INT8_C( -25)),
4629 { -53, 48, 63, -44, -103, 12, 49, -1,
4630 -58, 70, -18, -117, 101, -90, 121, -31 },
4631 { 51, -86, 63, -44, -103, 61, 49, -1,
4632 1, 70, -72, -117, -78, 93, 121, -23 } }
4633 };
4634
4635 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4636 int8_t r[16];
4637 simde_memcpy(r, test_vec[i].i, 16);
4638
4639 #if defined SIMDE_X86_SSE2_NATIVE && defined SIMDE_NATIVE_ALIASES_TESTING
4640 simde_mm_maskmoveu_si128(test_vec[i].a, test_vec[i].mask, HEDLEY_REINTERPRET_CAST(char *, r));
4641 #else
4642 simde_mm_maskmoveu_si128(test_vec[i].a, test_vec[i].mask, r);
4643 #endif
4644
4645 simde_assert_equal_vi8(sizeof(r) / sizeof(r[0]), r, test_vec[i].r);
4646 }
4647
4648 return 0;
4649 }
4650
4651 static int
4652 test_simde_mm_min_epu8(SIMDE_MUNIT_TEST_ARGS) {
4653 const struct {
4654 simde__m128i a;
4655 simde__m128i b;
4656 simde__m128i r;
4657 } test_vec[8] = {
4658 { simde_x_mm_set_epu8(164, 110, 116, 95, 43, 222, 122, 21,
4659 12, 65, 208, 248, 108, 166, 116, 17),
4660 simde_x_mm_set_epu8( 56, 62, 214, 11, 248, 124, 69, 2,
4661 198, 169, 227, 124, 192, 250, 140, 32),
4662 simde_x_mm_set_epu8( 56, 62, 116, 11, 43, 124, 69, 2,
4663 12, 65, 208, 124, 108, 166, 116, 17) },
4664 { simde_x_mm_set_epu8(136, 183, 233, 96, 11, 36, 213, 146,
4665 238, 174, 59, 229, 56, 186, 203, 28),
4666 simde_x_mm_set_epu8( 82, 12, 9, 200, 196, 116, 127, 247,
4667 1, 69, 178, 128, 65, 11, 179, 176),
4668 simde_x_mm_set_epu8( 82, 12, 9, 96, 11, 36, 127, 146,
4669 1, 69, 59, 128, 56, 11, 179, 28) },
4670 { simde_x_mm_set_epu8(164, 64, 195, 253, 35, 95, 119, 110,
4671 106, 102, 21, 173, 49, 40, 101, 151),
4672 simde_x_mm_set_epu8(109, 25, 32, 203, 64, 71, 33, 98,
4673 48, 43, 195, 67, 254, 158, 167, 217),
4674 simde_x_mm_set_epu8(109, 25, 32, 203, 35, 71, 33, 98,
4675 48, 43, 21, 67, 49, 40, 101, 151) },
4676 { simde_x_mm_set_epu8(233, 255, 136, 159, 118, 246, 37, 8,
4677 195, 35, 70, 7, 91, 37, 20, 112),
4678 simde_x_mm_set_epu8( 4, 91, 243, 163, 160, 26, 137, 208,
4679 146, 195, 124, 148, 53, 99, 21, 240),
4680 simde_x_mm_set_epu8( 4, 91, 136, 159, 118, 26, 37, 8,
4681 146, 35, 70, 7, 53, 37, 20, 112) },
4682 { simde_x_mm_set_epu8(196, 4, 110, 234, 88, 121, 133, 146,
4683 127, 167, 173, 105, 205, 0, 197, 107),
4684 simde_x_mm_set_epu8(240, 105, 248, 55, 202, 217, 219, 230,
4685 183, 240, 91, 164, 168, 6, 75, 186),
4686 simde_x_mm_set_epu8(196, 4, 110, 55, 88, 121, 133, 146,
4687 127, 167, 91, 105, 168, 0, 75, 107) },
4688 { simde_x_mm_set_epu8(191, 108, 145, 178, 194, 118, 187, 175,
4689 80, 196, 99, 239, 6, 206, 186, 130),
4690 simde_x_mm_set_epu8(109, 182, 208, 91, 232, 171, 41, 238,
4691 121, 144, 203, 42, 182, 89, 69, 166),
4692 simde_x_mm_set_epu8(109, 108, 145, 91, 194, 118, 41, 175,
4693 80, 144, 99, 42, 6, 89, 69, 130) },
4694 { simde_x_mm_set_epu8( 5, 152, 184, 251, 233, 22, 184, 152,
4695 12, 126, 120, 80, 191, 98, 37, 36),
4696 simde_x_mm_set_epu8(194, 116, 229, 250, 247, 241, 153, 192,
4697 20, 172, 224, 148, 240, 246, 120, 0),
4698 simde_x_mm_set_epu8( 5, 116, 184, 250, 233, 22, 153, 152,
4699 12, 126, 120, 80, 191, 98, 37, 0) },
4700 { simde_x_mm_set_epu8(188, 102, 137, 134, 213, 1, 140, 166,
4701 143, 171, 248, 89, 128, 81, 226, 136),
4702 simde_x_mm_set_epu8(246, 210, 83, 107, 44, 255, 100, 235,
4703 99, 233, 199, 197, 1, 159, 61, 31),
4704 simde_x_mm_set_epu8(188, 102, 83, 107, 44, 1, 100, 166,
4705 99, 171, 199, 89, 1, 81, 61, 31) }
4706 };
4707
4708 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4709 simde__m128i r = simde_mm_min_epu8(test_vec[i].a, test_vec[i].b);
4710 simde_assert_m128i_u8(r, ==, test_vec[i].r);
4711 }
4712
4713 return 0;
4714 }
4715
4716 static int
4717 test_simde_mm_min_epi16(SIMDE_MUNIT_TEST_ARGS) {
4718 const struct {
4719 simde__m128i a;
4720 simde__m128i b;
4721 simde__m128i r;
4722 } test_vec[8] = {
4723 { simde_mm_set_epi16(INT16_C(-23442), INT16_C( 29791), INT16_C( 11230), INT16_C( 31253),
4724 INT16_C( 3137), INT16_C(-12040), INT16_C( 27814), INT16_C( 29713)),
4725 simde_mm_set_epi16(INT16_C( 14398), INT16_C(-10741), INT16_C( -1924), INT16_C( 17666),
4726 INT16_C(-14679), INT16_C( -7300), INT16_C(-16134), INT16_C(-29664)),
4727 simde_mm_set_epi16(INT16_C(-23442), INT16_C(-10741), INT16_C( -1924), INT16_C( 17666),
4728 INT16_C(-14679), INT16_C(-12040), INT16_C(-16134), INT16_C(-29664)) },
4729 { simde_mm_set_epi16(INT16_C(-30537), INT16_C( -5792), INT16_C( 2852), INT16_C(-10862),
4730 INT16_C( -4434), INT16_C( 15333), INT16_C( 14522), INT16_C(-13540)),
4731 simde_mm_set_epi16(INT16_C( 21004), INT16_C( 2504), INT16_C(-15244), INT16_C( 32759),
4732 INT16_C( 325), INT16_C(-19840), INT16_C( 16651), INT16_C(-19536)),
4733 simde_mm_set_epi16(INT16_C(-30537), INT16_C( -5792), INT16_C(-15244), INT16_C(-10862),
4734 INT16_C( -4434), INT16_C(-19840), INT16_C( 14522), INT16_C(-19536)) },
4735 { simde_mm_set_epi16(INT16_C(-23488), INT16_C(-15363), INT16_C( 9055), INT16_C( 30574),
4736 INT16_C( 27238), INT16_C( 5549), INT16_C( 12584), INT16_C( 26007)),
4737 simde_mm_set_epi16(INT16_C( 27929), INT16_C( 8395), INT16_C( 16455), INT16_C( 8546),
4738 INT16_C( 12331), INT16_C(-15549), INT16_C( -354), INT16_C(-22567)),
4739 simde_mm_set_epi16(INT16_C(-23488), INT16_C(-15363), INT16_C( 9055), INT16_C( 8546),
4740 INT16_C( 12331), INT16_C(-15549), INT16_C( -354), INT16_C(-22567)) },
4741 { simde_mm_set_epi16(INT16_C( -5633), INT16_C(-30561), INT16_C( 30454), INT16_C( 9480),
4742 INT16_C(-15581), INT16_C( 17927), INT16_C( 23333), INT16_C( 5232)),
4743 simde_mm_set_epi16(INT16_C( 1115), INT16_C( -3165), INT16_C(-24550), INT16_C(-30256),
4744 INT16_C(-27965), INT16_C( 31892), INT16_C( 13667), INT16_C( 5616)),
4745 simde_mm_set_epi16(INT16_C( -5633), INT16_C(-30561), INT16_C(-24550), INT16_C(-30256),
4746 INT16_C(-27965), INT16_C( 17927), INT16_C( 13667), INT16_C( 5232)) },
4747 { simde_mm_set_epi16(INT16_C(-15356), INT16_C( 28394), INT16_C( 22649), INT16_C(-31342),
4748 INT16_C( 32679), INT16_C(-21143), INT16_C(-13056), INT16_C(-14997)),
4749 simde_mm_set_epi16(INT16_C( -3991), INT16_C( -1993), INT16_C(-13607), INT16_C( -9242),
4750 INT16_C(-18448), INT16_C( 23460), INT16_C(-22522), INT16_C( 19386)),
4751 simde_mm_set_epi16(INT16_C(-15356), INT16_C( -1993), INT16_C(-13607), INT16_C(-31342),
4752 INT16_C(-18448), INT16_C(-21143), INT16_C(-22522), INT16_C(-14997)) },
4753 { simde_mm_set_epi16(INT16_C(-16532), INT16_C(-28238), INT16_C(-15754), INT16_C(-17489),
4754 INT16_C( 20676), INT16_C( 25583), INT16_C( 1742), INT16_C(-17790)),
4755 simde_mm_set_epi16(INT16_C( 28086), INT16_C(-12197), INT16_C( -5973), INT16_C( 10734),
4756 INT16_C( 31120), INT16_C(-13526), INT16_C(-18855), INT16_C( 17830)),
4757 simde_mm_set_epi16(INT16_C(-16532), INT16_C(-28238), INT16_C(-15754), INT16_C(-17489),
4758 INT16_C( 20676), INT16_C(-13526), INT16_C(-18855), INT16_C(-17790)) },
4759 { simde_mm_set_epi16(INT16_C( 1432), INT16_C(-18181), INT16_C( -5866), INT16_C(-18280),
4760 INT16_C( 3198), INT16_C( 30800), INT16_C(-16542), INT16_C( 9508)),
4761 simde_mm_set_epi16(INT16_C(-15756), INT16_C( -6662), INT16_C( -2063), INT16_C(-26176),
4762 INT16_C( 5292), INT16_C( -8044), INT16_C( -3850), INT16_C( 30720)),
4763 simde_mm_set_epi16(INT16_C(-15756), INT16_C(-18181), INT16_C( -5866), INT16_C(-26176),
4764 INT16_C( 3198), INT16_C( -8044), INT16_C(-16542), INT16_C( 9508)) },
4765 { simde_mm_set_epi16(INT16_C(-17306), INT16_C(-30330), INT16_C(-11007), INT16_C(-29530),
4766 INT16_C(-28757), INT16_C( -1959), INT16_C(-32687), INT16_C( -7544)),
4767 simde_mm_set_epi16(INT16_C( -2350), INT16_C( 21355), INT16_C( 11519), INT16_C( 25835),
4768 INT16_C( 25577), INT16_C(-14395), INT16_C( 415), INT16_C( 15647)),
4769 simde_mm_set_epi16(INT16_C(-17306), INT16_C(-30330), INT16_C(-11007), INT16_C(-29530),
4770 INT16_C(-28757), INT16_C(-14395), INT16_C(-32687), INT16_C( -7544)) }
4771 };
4772
4773 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4774 simde__m128i r = simde_mm_min_epi16(test_vec[i].a, test_vec[i].b);
4775 simde_assert_m128i_u16(r, ==, test_vec[i].r);
4776 }
4777
4778 return 0;
4779 }
4780
4781 static int
4782 test_simde_mm_min_pd(SIMDE_MUNIT_TEST_ARGS) {
4783 const struct {
4784 simde__m128d a;
4785 simde__m128d b;
4786 simde__m128d r;
4787 } test_vec[10] = {
4788 { simde_mm_set_pd(SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -514.32)),
4789 simde_mm_set_pd(SIMDE_FLOAT64_C( 342.71), SIMDE_FLOAT64_C( 927.58)),
4790 simde_mm_set_pd(SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -514.32)) },
4791 { simde_mm_set_pd(SIMDE_FLOAT64_C( -704.64), SIMDE_FLOAT64_C( 925.40)),
4792 simde_mm_set_pd(SIMDE_FLOAT64_C( -589.60), SIMDE_FLOAT64_C( -498.63)),
4793 simde_mm_set_pd(SIMDE_FLOAT64_C( -704.64), SIMDE_FLOAT64_C( -498.63)) },
4794 { simde_mm_set_pd(SIMDE_FLOAT64_C( -244.39), SIMDE_FLOAT64_C( 572.76)),
4795 simde_mm_set_pd(SIMDE_FLOAT64_C( -10.04), SIMDE_FLOAT64_C( 293.99)),
4796 simde_mm_set_pd(SIMDE_FLOAT64_C( -244.39), SIMDE_FLOAT64_C( 293.99)) },
4797 { simde_mm_set_pd(SIMDE_FLOAT64_C( 321.70), SIMDE_FLOAT64_C( -283.39)),
4798 simde_mm_set_pd(SIMDE_FLOAT64_C( 60.35), SIMDE_FLOAT64_C( -248.75)),
4799 simde_mm_set_pd(SIMDE_FLOAT64_C( 60.35), SIMDE_FLOAT64_C( -283.39)) },
4800 { simde_mm_set_pd(SIMDE_FLOAT64_C( -67.84), SIMDE_FLOAT64_C( 763.91)),
4801 simde_mm_set_pd(SIMDE_FLOAT64_C( 150.47), SIMDE_FLOAT64_C( -773.85)),
4802 simde_mm_set_pd(SIMDE_FLOAT64_C( -67.84), SIMDE_FLOAT64_C( -773.85)) },
4803 { simde_mm_set_pd(SIMDE_FLOAT64_C( -42.86), SIMDE_FLOAT64_C( 169.28)),
4804 simde_mm_set_pd(SIMDE_FLOAT64_C( -820.89), SIMDE_FLOAT64_C( 325.20)),
4805 simde_mm_set_pd(SIMDE_FLOAT64_C( -820.89), SIMDE_FLOAT64_C( 169.28)) },
4806 { simde_mm_set_pd(SIMDE_FLOAT64_C( -976.90), SIMDE_FLOAT64_C( 883.11)),
4807 simde_mm_set_pd(SIMDE_FLOAT64_C( -450.39), SIMDE_FLOAT64_C( -249.21)),
4808 simde_mm_set_pd(SIMDE_FLOAT64_C( -976.90), SIMDE_FLOAT64_C( -249.21)) },
4809 { simde_mm_set_pd(SIMDE_FLOAT64_C( 495.76), SIMDE_FLOAT64_C( 415.93)),
4810 simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( -567.72)),
4811 simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( -567.72)) },
4812 { simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -567.72)),
4813 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -567.72)),
4814 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -567.72)) },
4815 { simde_mm_set_pd(SIMDE_FLOAT64_C( 495.76), SIMDE_FLOAT64_C( 0.00)),
4816 simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( 0.00)),
4817 simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( 0.00)) },
4818 };
4819
4820 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4821 simde__m128d r = simde_mm_min_pd(test_vec[i].a, test_vec[i].b);
4822 simde_assert_m128d_close(r, test_vec[i].r, 1);
4823 }
4824
4825 return 0;
4826 }
4827
4828 static int
4829 test_simde_mm_min_sd(SIMDE_MUNIT_TEST_ARGS) {
4830 const struct {
4831 simde__m128d a;
4832 simde__m128d b;
4833 simde__m128d r;
4834 } test_vec[8] = {
4835 { simde_mm_set_pd(SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -514.32)),
4836 simde_mm_set_pd(SIMDE_FLOAT64_C( 342.71), SIMDE_FLOAT64_C( 927.58)),
4837 simde_mm_set_pd(SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -514.32)) },
4838 { simde_mm_set_pd(SIMDE_FLOAT64_C( -704.64), SIMDE_FLOAT64_C( 925.40)),
4839 simde_mm_set_pd(SIMDE_FLOAT64_C( -589.60), SIMDE_FLOAT64_C( -498.63)),
4840 simde_mm_set_pd(SIMDE_FLOAT64_C( -704.64), SIMDE_FLOAT64_C( -498.63)) },
4841 { simde_mm_set_pd(SIMDE_FLOAT64_C( -244.39), SIMDE_FLOAT64_C( 572.76)),
4842 simde_mm_set_pd(SIMDE_FLOAT64_C( -10.04), SIMDE_FLOAT64_C( 293.99)),
4843 simde_mm_set_pd(SIMDE_FLOAT64_C( -244.39), SIMDE_FLOAT64_C( 293.99)) },
4844 { simde_mm_set_pd(SIMDE_FLOAT64_C( 321.70), SIMDE_FLOAT64_C( -283.39)),
4845 simde_mm_set_pd(SIMDE_FLOAT64_C( 60.35), SIMDE_FLOAT64_C( -248.75)),
4846 simde_mm_set_pd(SIMDE_FLOAT64_C( 321.70), SIMDE_FLOAT64_C( -283.39)) },
4847 { simde_mm_set_pd(SIMDE_FLOAT64_C( -67.84), SIMDE_FLOAT64_C( 763.91)),
4848 simde_mm_set_pd(SIMDE_FLOAT64_C( 150.47), SIMDE_FLOAT64_C( -773.85)),
4849 simde_mm_set_pd(SIMDE_FLOAT64_C( -67.84), SIMDE_FLOAT64_C( -773.85)) },
4850 { simde_mm_set_pd(SIMDE_FLOAT64_C( -42.86), SIMDE_FLOAT64_C( 169.28)),
4851 simde_mm_set_pd(SIMDE_FLOAT64_C( -820.89), SIMDE_FLOAT64_C( 325.20)),
4852 simde_mm_set_pd(SIMDE_FLOAT64_C( -42.86), SIMDE_FLOAT64_C( 169.28)) },
4853 { simde_mm_set_pd(SIMDE_FLOAT64_C( -976.90), SIMDE_FLOAT64_C( 883.11)),
4854 simde_mm_set_pd(SIMDE_FLOAT64_C( -450.39), SIMDE_FLOAT64_C( -249.21)),
4855 simde_mm_set_pd(SIMDE_FLOAT64_C( -976.90), SIMDE_FLOAT64_C( -249.21)) },
4856 { simde_mm_set_pd(SIMDE_FLOAT64_C( 495.76), SIMDE_FLOAT64_C( 415.93)),
4857 simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( -567.72)),
4858 simde_mm_set_pd(SIMDE_FLOAT64_C( 495.76), SIMDE_FLOAT64_C( -567.72)) }
4859 };
4860
4861 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4862 simde__m128d r = simde_mm_min_sd(test_vec[i].a, test_vec[i].b);
4863 simde_assert_m128d_close(r, test_vec[i].r, 1);
4864 }
4865
4866 return 0;
4867 }
4868
4869 static int
4870 test_simde_mm_max_epu8(SIMDE_MUNIT_TEST_ARGS) {
4871 const struct {
4872 simde__m128i a;
4873 simde__m128i b;
4874 simde__m128i r;
4875 } test_vec[8] = {
4876 { simde_x_mm_set_epu8(168, 216, 116, 83, 89, 36, 70, 43,
4877 213, 80, 178, 134, 66, 113, 243, 129),
4878 simde_x_mm_set_epu8(121, 26, 77, 212, 58, 167, 83, 180,
4879 236, 50, 65, 112, 248, 228, 87, 231),
4880 simde_x_mm_set_epu8(168, 216, 116, 212, 89, 167, 83, 180,
4881 236, 80, 178, 134, 248, 228, 243, 231) },
4882 { simde_x_mm_set_epu8(157, 143, 244, 146, 49, 140, 145, 221,
4883 80, 79, 154, 71, 49, 213, 105, 22),
4884 simde_x_mm_set_epu8( 81, 58, 115, 104, 26, 180, 63, 33,
4885 16, 205, 98, 228, 235, 156, 147, 109),
4886 simde_x_mm_set_epu8(157, 143, 244, 146, 49, 180, 145, 221,
4887 80, 205, 154, 228, 235, 213, 147, 109) },
4888 { simde_x_mm_set_epu8(120, 153, 102, 244, 149, 171, 101, 141,
4889 231, 205, 156, 11, 214, 255, 28, 215),
4890 simde_x_mm_set_epu8( 89, 227, 119, 48, 219, 88, 0, 68,
4891 146, 196, 199, 34, 143, 246, 184, 31),
4892 simde_x_mm_set_epu8(120, 227, 119, 244, 219, 171, 101, 141,
4893 231, 205, 199, 34, 214, 255, 184, 215) },
4894 { simde_x_mm_set_epu8(201, 221, 39, 38, 119, 106, 89, 236,
4895 8, 81, 136, 17, 62, 33, 200, 24),
4896 simde_x_mm_set_epu8( 76, 153, 167, 42, 171, 206, 46, 181,
4897 37, 117, 72, 251, 153, 91, 107, 96),
4898 simde_x_mm_set_epu8(201, 221, 167, 42, 171, 206, 89, 236,
4899 37, 117, 136, 251, 153, 91, 200, 96) },
4900 { simde_x_mm_set_epu8( 34, 94, 125, 66, 238, 110, 110, 27,
4901 90, 179, 184, 250, 202, 62, 132, 68),
4902 simde_x_mm_set_epu8(135, 208, 31, 76, 51, 5, 50, 220,
4903 43, 120, 10, 131, 247, 241, 134, 232),
4904 simde_x_mm_set_epu8(135, 208, 125, 76, 238, 110, 110, 220,
4905 90, 179, 184, 250, 247, 241, 134, 232) },
4906 { simde_x_mm_set_epu8( 12, 112, 35, 12, 111, 1, 16, 229,
4907 119, 199, 69, 96, 220, 123, 153, 230),
4908 simde_x_mm_set_epu8(147, 155, 56, 136, 236, 16, 93, 16,
4909 43, 253, 136, 239, 147, 44, 146, 0),
4910 simde_x_mm_set_epu8(147, 155, 56, 136, 236, 16, 93, 229,
4911 119, 253, 136, 239, 220, 123, 153, 230) },
4912 { simde_x_mm_set_epu8(138, 177, 86, 183, 144, 112, 42, 67,
4913 100, 123, 214, 234, 34, 240, 19, 10),
4914 simde_x_mm_set_epu8( 81, 53, 255, 195, 169, 127, 131, 109,
4915 181, 161, 246, 113, 87, 20, 157, 194),
4916 simde_x_mm_set_epu8(138, 177, 255, 195, 169, 127, 131, 109,
4917 181, 161, 246, 234, 87, 240, 157, 194) },
4918 { simde_x_mm_set_epu8( 5, 26, 135, 12, 151, 226, 187, 12,
4919 213, 244, 209, 245, 136, 13, 164, 249),
4920 simde_x_mm_set_epu8( 92, 57, 200, 208, 212, 214, 211, 217,
4921 65, 228, 179, 64, 34, 236, 220, 208),
4922 simde_x_mm_set_epu8( 92, 57, 200, 208, 212, 226, 211, 217,
4923 213, 244, 209, 245, 136, 236, 220, 249) }
4924 };
4925
4926 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4927 simde__m128i r = simde_mm_max_epu8(test_vec[i].a, test_vec[i].b);
4928 simde_assert_m128i_u8(r, ==, test_vec[i].r);
4929 }
4930
4931 return 0;
4932 }
4933
4934 static int
4935 test_simde_mm_max_epi16(SIMDE_MUNIT_TEST_ARGS) {
4936 const struct {
4937 simde__m128i a;
4938 simde__m128i b;
4939 simde__m128i r;
4940 } test_vec[8] = {
4941 { simde_mm_set_epi16(INT16_C(-22312), INT16_C( 29779), INT16_C( 22820), INT16_C( 17963),
4942 INT16_C(-10928), INT16_C(-19834), INT16_C( 17009), INT16_C( -3199)),
4943 simde_mm_set_epi16(INT16_C( 31002), INT16_C( 19924), INT16_C( 15015), INT16_C( 21428),
4944 INT16_C( -5070), INT16_C( 16752), INT16_C( -1820), INT16_C( 22503)),
4945 simde_mm_set_epi16(INT16_C( 31002), INT16_C( 29779), INT16_C( 22820), INT16_C( 21428),
4946 INT16_C( -5070), INT16_C( 16752), INT16_C( 17009), INT16_C( 22503)) },
4947 { simde_mm_set_epi16(INT16_C(-25201), INT16_C( -2926), INT16_C( 12684), INT16_C(-28195),
4948 INT16_C( 20559), INT16_C(-26041), INT16_C( 12757), INT16_C( 26902)),
4949 simde_mm_set_epi16(INT16_C( 20794), INT16_C( 29544), INT16_C( 6836), INT16_C( 16161),
4950 INT16_C( 4301), INT16_C( 25316), INT16_C( -5220), INT16_C(-27795)),
4951 simde_mm_set_epi16(INT16_C( 20794), INT16_C( 29544), INT16_C( 12684), INT16_C( 16161),
4952 INT16_C( 20559), INT16_C( 25316), INT16_C( 12757), INT16_C( 26902)) },
4953 { simde_mm_set_epi16(INT16_C( 30873), INT16_C( 26356), INT16_C(-27221), INT16_C( 25997),
4954 INT16_C( -6195), INT16_C(-25589), INT16_C(-10497), INT16_C( 7383)),
4955 simde_mm_set_epi16(INT16_C( 23011), INT16_C( 30512), INT16_C( -9384), INT16_C( 68),
4956 INT16_C(-27964), INT16_C(-14558), INT16_C(-28682), INT16_C(-18401)),
4957 simde_mm_set_epi16(INT16_C( 30873), INT16_C( 30512), INT16_C( -9384), INT16_C( 25997),
4958 INT16_C( -6195), INT16_C(-14558), INT16_C(-10497), INT16_C( 7383)) },
4959 { simde_mm_set_epi16(INT16_C(-13859), INT16_C( 10022), INT16_C( 30570), INT16_C( 23020),
4960 INT16_C( 2129), INT16_C(-30703), INT16_C( 15905), INT16_C(-14312)),
4961 simde_mm_set_epi16(INT16_C( 19609), INT16_C(-22742), INT16_C(-21554), INT16_C( 11957),
4962 INT16_C( 9589), INT16_C( 18683), INT16_C(-26277), INT16_C( 27488)),
4963 simde_mm_set_epi16(INT16_C( 19609), INT16_C( 10022), INT16_C( 30570), INT16_C( 23020),
4964 INT16_C( 9589), INT16_C( 18683), INT16_C( 15905), INT16_C( 27488)) },
4965 { simde_mm_set_epi16(INT16_C( 8798), INT16_C( 32066), INT16_C( -4498), INT16_C( 28187),
4966 INT16_C( 23219), INT16_C(-18182), INT16_C(-13762), INT16_C(-31676)),
4967 simde_mm_set_epi16(INT16_C(-30768), INT16_C( 8012), INT16_C( 13061), INT16_C( 13020),
4968 INT16_C( 11128), INT16_C( 2691), INT16_C( -2063), INT16_C(-31000)),
4969 simde_mm_set_epi16(INT16_C( 8798), INT16_C( 32066), INT16_C( 13061), INT16_C( 28187),
4970 INT16_C( 23219), INT16_C( 2691), INT16_C( -2063), INT16_C(-31000)) },
4971 { simde_mm_set_epi16(INT16_C( 3184), INT16_C( 8972), INT16_C( 28417), INT16_C( 4325),
4972 INT16_C( 30663), INT16_C( 17760), INT16_C( -9093), INT16_C(-26138)),
4973 simde_mm_set_epi16(INT16_C(-27749), INT16_C( 14472), INT16_C( -5104), INT16_C( 23824),
4974 INT16_C( 11261), INT16_C(-30481), INT16_C(-27860), INT16_C(-28160)),
4975 simde_mm_set_epi16(INT16_C( 3184), INT16_C( 14472), INT16_C( 28417), INT16_C( 23824),
4976 INT16_C( 30663), INT16_C( 17760), INT16_C( -9093), INT16_C(-26138)) },
4977 { simde_mm_set_epi16(INT16_C(-30031), INT16_C( 22199), INT16_C(-28560), INT16_C( 10819),
4978 INT16_C( 25723), INT16_C(-10518), INT16_C( 8944), INT16_C( 4874)),
4979 simde_mm_set_epi16(INT16_C( 20789), INT16_C( -61), INT16_C(-22145), INT16_C(-31891),
4980 INT16_C(-19039), INT16_C( -2447), INT16_C( 22292), INT16_C(-25150)),
4981 simde_mm_set_epi16(INT16_C( 20789), INT16_C( 22199), INT16_C(-22145), INT16_C( 10819),
4982 INT16_C( 25723), INT16_C( -2447), INT16_C( 22292), INT16_C( 4874)) },
4983 { simde_mm_set_epi16(INT16_C( 1306), INT16_C(-30964), INT16_C(-26654), INT16_C(-17652),
4984 INT16_C(-10764), INT16_C(-11787), INT16_C(-30707), INT16_C(-23303)),
4985 simde_mm_set_epi16(INT16_C( 23609), INT16_C(-14128), INT16_C(-11050), INT16_C(-11303),
4986 INT16_C( 16868), INT16_C(-19648), INT16_C( 8940), INT16_C( -9008)),
4987 simde_mm_set_epi16(INT16_C( 23609), INT16_C(-14128), INT16_C(-11050), INT16_C(-11303),
4988 INT16_C( 16868), INT16_C(-11787), INT16_C( 8940), INT16_C( -9008)) }
4989 };
4990
4991 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4992 simde__m128i r = simde_mm_max_epi16(test_vec[i].a, test_vec[i].b);
4993 simde_assert_m128i_i16(r, ==, test_vec[i].r);
4994 }
4995
4996 return 0;
4997 }
4998
4999 static int
5000 test_simde_mm_max_pd(SIMDE_MUNIT_TEST_ARGS) {
5001 const struct {
5002 simde__m128d a;
5003 simde__m128d b;
5004 simde__m128d r;
5005 } test_vec[8] = {
5006 { simde_mm_set_pd(SIMDE_FLOAT64_C( -303.58), SIMDE_FLOAT64_C( -480.90)),
5007 simde_mm_set_pd(SIMDE_FLOAT64_C( 319.11), SIMDE_FLOAT64_C( 666.53)),
5008 simde_mm_set_pd(SIMDE_FLOAT64_C( 319.11), SIMDE_FLOAT64_C( 666.53)) },
5009 { simde_mm_set_pd(SIMDE_FLOAT64_C( -541.77), SIMDE_FLOAT64_C( 944.47)),
5010 simde_mm_set_pd(SIMDE_FLOAT64_C( -53.88), SIMDE_FLOAT64_C( 845.28)),
5011 simde_mm_set_pd(SIMDE_FLOAT64_C( -53.88), SIMDE_FLOAT64_C( 944.47)) },
5012 { simde_mm_set_pd(SIMDE_FLOAT64_C( -612.90), SIMDE_FLOAT64_C( -610.67)),
5013 simde_mm_set_pd(SIMDE_FLOAT64_C( 230.96), SIMDE_FLOAT64_C( -372.57)),
5014 simde_mm_set_pd(SIMDE_FLOAT64_C( 230.96), SIMDE_FLOAT64_C( -372.57)) },
5015 { simde_mm_set_pd(SIMDE_FLOAT64_C( -791.37), SIMDE_FLOAT64_C( 840.72)),
5016 simde_mm_set_pd(SIMDE_FLOAT64_C( -365.40), SIMDE_FLOAT64_C( -868.73)),
5017 simde_mm_set_pd(SIMDE_FLOAT64_C( -365.40), SIMDE_FLOAT64_C( 840.72)) },
5018 { simde_mm_set_pd(SIMDE_FLOAT64_C( 169.29), SIMDE_FLOAT64_C( 679.66)),
5019 simde_mm_set_pd(SIMDE_FLOAT64_C( -57.82), SIMDE_FLOAT64_C( 810.96)),
5020 simde_mm_set_pd(SIMDE_FLOAT64_C( 169.29), SIMDE_FLOAT64_C( 810.96)) },
5021 { simde_mm_set_pd(SIMDE_FLOAT64_C( 713.62), SIMDE_FLOAT64_C( 124.72)),
5022 simde_mm_set_pd(SIMDE_FLOAT64_C( -297.75), SIMDE_FLOAT64_C( 146.63)),
5023 simde_mm_set_pd(SIMDE_FLOAT64_C( 713.62), SIMDE_FLOAT64_C( 146.63)) },
5024 { simde_mm_set_pd(SIMDE_FLOAT64_C( -67.07), SIMDE_FLOAT64_C( -514.59)),
5025 simde_mm_set_pd(SIMDE_FLOAT64_C( 577.06), SIMDE_FLOAT64_C( -935.01)),
5026 simde_mm_set_pd(SIMDE_FLOAT64_C( 577.06), SIMDE_FLOAT64_C( -514.59)) },
5027 { simde_mm_set_pd(SIMDE_FLOAT64_C( 342.23), SIMDE_FLOAT64_C( 198.10)),
5028 simde_mm_set_pd(SIMDE_FLOAT64_C( -401.56), SIMDE_FLOAT64_C( -707.36)),
5029 simde_mm_set_pd(SIMDE_FLOAT64_C( 342.23), SIMDE_FLOAT64_C( 198.10)) }
5030 };
5031
5032 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5033 simde__m128d r = simde_mm_max_pd(test_vec[i].a, test_vec[i].b);
5034 simde_assert_m128d_close(r, test_vec[i].r, 1);
5035 }
5036
5037 return 0;
5038 }
5039
5040 static int
5041 test_simde_mm_max_sd(SIMDE_MUNIT_TEST_ARGS) {
5042 const struct {
5043 simde__m128d a;
5044 simde__m128d b;
5045 simde__m128d r;
5046 } test_vec[8] = {
5047 { simde_mm_set_pd(SIMDE_FLOAT64_C( -303.58), SIMDE_FLOAT64_C( -480.90)),
5048 simde_mm_set_pd(SIMDE_FLOAT64_C( 319.11), SIMDE_FLOAT64_C( 666.53)),
5049 simde_mm_set_pd(SIMDE_FLOAT64_C( -303.58), SIMDE_FLOAT64_C( 666.53)) },
5050 { simde_mm_set_pd(SIMDE_FLOAT64_C( -541.77), SIMDE_FLOAT64_C( 944.47)),
5051 simde_mm_set_pd(SIMDE_FLOAT64_C( -53.88), SIMDE_FLOAT64_C( 845.28)),
5052 simde_mm_set_pd(SIMDE_FLOAT64_C( -541.77), SIMDE_FLOAT64_C( 944.47)) },
5053 { simde_mm_set_pd(SIMDE_FLOAT64_C( -612.90), SIMDE_FLOAT64_C( -610.67)),
5054 simde_mm_set_pd(SIMDE_FLOAT64_C( 230.96), SIMDE_FLOAT64_C( -372.57)),
5055 simde_mm_set_pd(SIMDE_FLOAT64_C( -612.90), SIMDE_FLOAT64_C( -372.57)) },
5056 { simde_mm_set_pd(SIMDE_FLOAT64_C( -791.37), SIMDE_FLOAT64_C( 840.72)),
5057 simde_mm_set_pd(SIMDE_FLOAT64_C( -365.40), SIMDE_FLOAT64_C( -868.73)),
5058 simde_mm_set_pd(SIMDE_FLOAT64_C( -791.37), SIMDE_FLOAT64_C( 840.72)) },
5059 { simde_mm_set_pd(SIMDE_FLOAT64_C( 169.29), SIMDE_FLOAT64_C( 679.66)),
5060 simde_mm_set_pd(SIMDE_FLOAT64_C( -57.82), SIMDE_FLOAT64_C( 810.96)),
5061 simde_mm_set_pd(SIMDE_FLOAT64_C( 169.29), SIMDE_FLOAT64_C( 810.96)) },
5062 { simde_mm_set_pd(SIMDE_FLOAT64_C( 713.62), SIMDE_FLOAT64_C( 124.72)),
5063 simde_mm_set_pd(SIMDE_FLOAT64_C( -297.75), SIMDE_FLOAT64_C( 146.63)),
5064 simde_mm_set_pd(SIMDE_FLOAT64_C( 713.62), SIMDE_FLOAT64_C( 146.63)) },
5065 { simde_mm_set_pd(SIMDE_FLOAT64_C( -67.07), SIMDE_FLOAT64_C( -514.59)),
5066 simde_mm_set_pd(SIMDE_FLOAT64_C( 577.06), SIMDE_FLOAT64_C( -935.01)),
5067 simde_mm_set_pd(SIMDE_FLOAT64_C( -67.07), SIMDE_FLOAT64_C( -514.59)) },
5068 { simde_mm_set_pd(SIMDE_FLOAT64_C( 342.23), SIMDE_FLOAT64_C( 198.10)),
5069 simde_mm_set_pd(SIMDE_FLOAT64_C( -401.56), SIMDE_FLOAT64_C( -707.36)),
5070 simde_mm_set_pd(SIMDE_FLOAT64_C( 342.23), SIMDE_FLOAT64_C( 198.10)) }
5071 };
5072
5073 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5074 simde__m128d r = simde_mm_max_sd(test_vec[i].a, test_vec[i].b);
5075 simde_assert_m128d_close(r, test_vec[i].r, 1);
5076 }
5077
5078 return 0;
5079 }
5080
5081 static int
5082 test_simde_mm_move_epi64(SIMDE_MUNIT_TEST_ARGS) {
5083 const struct {
5084 simde__m128i a;
5085 simde__m128i r;
5086 } test_vec[8] = {
5087 { simde_mm_set_epi64x(INT64_C(-2982745844705455901), INT64_C( 4775804171231816037)),
5088 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 4775804171231816037)) },
5089 { simde_mm_set_epi64x(INT64_C( 5762346410957661033), INT64_C( 2977172799723381810)),
5090 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 2977172799723381810)) },
5091 { simde_mm_set_epi64x(INT64_C( 1008079402021318109), INT64_C( 2502061726771043310)),
5092 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 2502061726771043310)) },
5093 { simde_mm_set_epi64x(INT64_C( 5339677830223010942), INT64_C( 8124798084034539527)),
5094 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 8124798084034539527)) },
5095 { simde_mm_set_epi64x(INT64_C(-4521066662096167363), INT64_C( -947809468227977762)),
5096 simde_mm_set_epi64x(INT64_C( 0), INT64_C( -947809468227977762)) },
5097 { simde_mm_set_epi64x(INT64_C(-1218747510360922612), INT64_C( 3122441631876631480)),
5098 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 3122441631876631480)) },
5099 { simde_mm_set_epi64x(INT64_C(-3523922424397514946), INT64_C( -108841976580709576)),
5100 simde_mm_set_epi64x(INT64_C( 0), INT64_C( -108841976580709576)) },
5101 { simde_mm_set_epi64x(INT64_C(-3961221708434347271), INT64_C(-1875395594913971276)),
5102 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1875395594913971276)) }
5103 };
5104
5105 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5106 simde__m128i r = simde_mm_move_epi64(test_vec[i].a);
5107 simde_assert_m128i_i64(r, ==, test_vec[i].r);
5108 }
5109
5110 return 0;
5111 }
5112
5113 static int
5114 test_simde_mm_move_sd(SIMDE_MUNIT_TEST_ARGS) {
5115 const struct {
5116 simde__m128d a;
5117 simde__m128d b;
5118 simde__m128d r;
5119 } test_vec[8] = {
5120 { simde_mm_set_pd(SIMDE_FLOAT64_C( 570.81), SIMDE_FLOAT64_C( -941.79)),
5121 simde_mm_set_pd(SIMDE_FLOAT64_C( -875.32), SIMDE_FLOAT64_C( -356.30)),
5122 simde_mm_set_pd(SIMDE_FLOAT64_C( 570.81), SIMDE_FLOAT64_C( -356.30)) },
5123 { simde_mm_set_pd(SIMDE_FLOAT64_C( -816.08), SIMDE_FLOAT64_C( 126.72)),
5124 simde_mm_set_pd(SIMDE_FLOAT64_C( -886.56), SIMDE_FLOAT64_C( 985.06)),
5125 simde_mm_set_pd(SIMDE_FLOAT64_C( -816.08), SIMDE_FLOAT64_C( 985.06)) },
5126 { simde_mm_set_pd(SIMDE_FLOAT64_C( 358.01), SIMDE_FLOAT64_C( 59.13)),
5127 simde_mm_set_pd(SIMDE_FLOAT64_C( -61.40), SIMDE_FLOAT64_C( -717.39)),
5128 simde_mm_set_pd(SIMDE_FLOAT64_C( 358.01), SIMDE_FLOAT64_C( -717.39)) },
5129 { simde_mm_set_pd(SIMDE_FLOAT64_C( 110.04), SIMDE_FLOAT64_C( -44.09)),
5130 simde_mm_set_pd(SIMDE_FLOAT64_C( -614.96), SIMDE_FLOAT64_C( -267.93)),
5131 simde_mm_set_pd(SIMDE_FLOAT64_C( 110.04), SIMDE_FLOAT64_C( -267.93)) },
5132 { simde_mm_set_pd(SIMDE_FLOAT64_C( 571.79), SIMDE_FLOAT64_C( -816.23)),
5133 simde_mm_set_pd(SIMDE_FLOAT64_C( 917.45), SIMDE_FLOAT64_C( 287.41)),
5134 simde_mm_set_pd(SIMDE_FLOAT64_C( 571.79), SIMDE_FLOAT64_C( 287.41)) },
5135 { simde_mm_set_pd(SIMDE_FLOAT64_C( 827.50), SIMDE_FLOAT64_C( 261.09)),
5136 simde_mm_set_pd(SIMDE_FLOAT64_C( 478.77), SIMDE_FLOAT64_C( 33.99)),
5137 simde_mm_set_pd(SIMDE_FLOAT64_C( 827.50), SIMDE_FLOAT64_C( 33.99)) },
5138 { simde_mm_set_pd(SIMDE_FLOAT64_C( -335.82), SIMDE_FLOAT64_C( 465.36)),
5139 simde_mm_set_pd(SIMDE_FLOAT64_C( -993.24), SIMDE_FLOAT64_C( 100.89)),
5140 simde_mm_set_pd(SIMDE_FLOAT64_C( -335.82), SIMDE_FLOAT64_C( 100.89)) },
5141 { simde_mm_set_pd(SIMDE_FLOAT64_C( 415.58), SIMDE_FLOAT64_C( -984.83)),
5142 simde_mm_set_pd(SIMDE_FLOAT64_C( 764.57), SIMDE_FLOAT64_C( 672.72)),
5143 simde_mm_set_pd(SIMDE_FLOAT64_C( 415.58), SIMDE_FLOAT64_C( 672.72)) }
5144 };
5145
5146 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5147 simde__m128d r = simde_mm_move_sd(test_vec[i].a, test_vec[i].b);
5148 simde_assert_m128d_close(r, test_vec[i].r, 1);
5149 }
5150
5151 return 0;
5152 }
5153
5154 static int
5155 test_simde_mm_movemask_epi8(SIMDE_MUNIT_TEST_ARGS) {
5156 const struct {
5157 simde__m128i a;
5158 int32_t r;
5159 } test_vec[] = {
5160 { simde_mm_set_epi8(INT8_C(-125), INT8_C( -40), INT8_C( -7), INT8_C( -71),
5161 INT8_C( -75), INT8_C( 99), INT8_C( 101), INT8_C( -5),
5162 INT8_C( -71), INT8_C( -91), INT8_C( -60), INT8_C( 9),
5163 INT8_C( -27), INT8_C( -81), INT8_C( 5), INT8_C( 97)),
5164 INT32_C(63980) },
5165 { simde_mm_set_epi8(INT8_C( 44), INT8_C( -98), INT8_C( 82), INT8_C(-127),
5166 INT8_C( -28), INT8_C( 122), INT8_C( -22), INT8_C( 46),
5167 INT8_C( -41), INT8_C( -35), INT8_C( 15), INT8_C( 43),
5168 INT8_C( -37), INT8_C( -12), INT8_C( 17), INT8_C( -17)),
5169 INT32_C(23245) },
5170 { simde_mm_set_epi8(INT8_C( -53), INT8_C( -99), INT8_C( 91), INT8_C( -56),
5171 INT8_C( 10), INT8_C( 114), INT8_C(-120), INT8_C( 67),
5172 INT8_C( -82), INT8_C( 13), INT8_C( 104), INT8_C( 1),
5173 INT8_C( 15), INT8_C(-115), INT8_C( 16), INT8_C( 33)),
5174 INT32_C(53892) },
5175 { simde_mm_set_epi8(INT8_C( 109), INT8_C( -5), INT8_C( -45), INT8_C( 60),
5176 INT8_C( -20), INT8_C( -7), INT8_C( -24), INT8_C( 63),
5177 INT8_C( 61), INT8_C( -94), INT8_C(-110), INT8_C( 16),
5178 INT8_C( 117), INT8_C( -23), INT8_C( -49), INT8_C( -74)),
5179 INT32_C(28263) },
5180 { simde_mm_set_epi8(INT8_C( -88), INT8_C( 110), INT8_C(-108), INT8_C( -88),
5181 INT8_C( 28), INT8_C( 110), INT8_C( 0), INT8_C( -12),
5182 INT8_C( -90), INT8_C( 44), INT8_C( -42), INT8_C( -87),
5183 INT8_C( -48), INT8_C( -87), INT8_C( -21), INT8_C( -64)),
5184 INT32_C(45503) },
5185 { simde_mm_set_epi8(INT8_C( 121), INT8_C(-111), INT8_C( -1), INT8_C( -61),
5186 INT8_C( 67), INT8_C( 90), INT8_C( 10), INT8_C( 65),
5187 INT8_C( 36), INT8_C( -60), INT8_C( 93), INT8_C( -3),
5188 INT8_C(-112), INT8_C( -8), INT8_C( 55), INT8_C( -49)),
5189 INT32_C(28765) },
5190 { simde_mm_set_epi8(INT8_C( 92), INT8_C( -27), INT8_C( 37), INT8_C( -87),
5191 INT8_C( 58), INT8_C( 108), INT8_C( -50), INT8_C( -10),
5192 INT8_C( 5), INT8_C( 21), INT8_C( 14), INT8_C( 72),
5193 INT8_C( -76), INT8_C( 21), INT8_C(-104), INT8_C( 110)),
5194 INT32_C(21258) },
5195 { simde_mm_set_epi8(INT8_C( -60), INT8_C( 23), INT8_C( -54), INT8_C( 54),
5196 INT8_C( 31), INT8_C( 13), INT8_C( -93), INT8_C( 18),
5197 INT8_C( -62), INT8_C(-128), INT8_C( 70), INT8_C( 59),
5198 INT8_C( 17), INT8_C( 49), INT8_C( 95), INT8_C( -96)),
5199 INT32_C(41665) }
5200 };
5201
5202 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5203 int32_t r = simde_mm_movemask_epi8(test_vec[i].a);
5204 simde_assert_equal_i32(r, test_vec[i].r);
5205 }
5206
5207 return 0;
5208 }
5209
5210 static int
5211 test_simde_mm_movemask_pd(SIMDE_MUNIT_TEST_ARGS) {
5212 const struct {
5213 simde__m128d a;
5214 int32_t r;
5215 } test_vec[8] = {
5216 { simde_mm_set_pd(SIMDE_FLOAT64_C( -532.45), SIMDE_FLOAT64_C( 863.01)),
5217 INT32_C(2) },
5218 { simde_mm_set_pd(SIMDE_FLOAT64_C( -749.24), SIMDE_FLOAT64_C( -869.97)),
5219 INT32_C(3) },
5220 { simde_mm_set_pd(SIMDE_FLOAT64_C( 994.23), SIMDE_FLOAT64_C( 351.47)),
5221 INT32_C(0) },
5222 { simde_mm_set_pd(SIMDE_FLOAT64_C( 413.53), SIMDE_FLOAT64_C( -655.32)),
5223 INT32_C(1) },
5224 { simde_mm_set_pd(SIMDE_FLOAT64_C( -253.74), SIMDE_FLOAT64_C( -2.37)),
5225 INT32_C(3) },
5226 { simde_mm_set_pd(SIMDE_FLOAT64_C( 696.22), SIMDE_FLOAT64_C( -699.75)),
5227 INT32_C(1) },
5228 { simde_mm_set_pd(SIMDE_FLOAT64_C( 55.24), SIMDE_FLOAT64_C( -722.45)),
5229 INT32_C(1) },
5230 { simde_mm_set_pd(SIMDE_FLOAT64_C( -592.60), SIMDE_FLOAT64_C( 141.10)),
5231 INT32_C(2) }
5232 };
5233
5234 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5235 int32_t r = simde_mm_movemask_pd(test_vec[i].a);
5236 simde_assert_equal_i32(r, test_vec[i].r);
5237 }
5238
5239 return 0;
5240 }
5241
5242 static int
5243 test_simde_mm_movepi64_pi64(SIMDE_MUNIT_TEST_ARGS) {
5244 const struct {
5245 simde__m128i a;
5246 simde__m64 r;
5247 } test_vec[8] = {
5248 { simde_mm_set_epi64x(INT64_C(-3369091548753669372), INT64_C(-3862634862308997761)),
5249 simde_x_mm_set_pi64(INT64_C(-3862634862308997761)) },
5250 { simde_mm_set_epi64x(INT64_C( 1195923961730132400), INT64_C(-4203048506958717476)),
5251 simde_x_mm_set_pi64(INT64_C(-4203048506958717476)) },
5252 { simde_mm_set_epi64x(INT64_C( 4316262850566382732), INT64_C( 743544812785944809)),
5253 simde_x_mm_set_pi64(INT64_C(743544812785944809)) },
5254 { simde_mm_set_epi64x(INT64_C( 9153964415619232912), INT64_C( 7102186508934354546)),
5255 simde_x_mm_set_pi64(INT64_C(7102186508934354546)) },
5256 { simde_mm_set_epi64x(INT64_C( -149536427124813706), INT64_C(-2645616526676309339)),
5257 simde_x_mm_set_pi64(INT64_C(-2645616526676309339)) },
5258 { simde_mm_set_epi64x(INT64_C( 7660292028637459230), INT64_C(-4472173852492382560)),
5259 simde_x_mm_set_pi64(INT64_C(-4472173852492382560)) },
5260 { simde_mm_set_epi64x(INT64_C( 2373412759770157312), INT64_C( -249935199655019513)),
5261 simde_x_mm_set_pi64(INT64_C(-249935199655019513)) },
5262 { simde_mm_set_epi64x(INT64_C(-2495482311671930573), INT64_C( 7782795372632782061)),
5263 simde_x_mm_set_pi64(INT64_C(7782795372632782061)) }
5264 };
5265
5266 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5267 simde__m64 r = simde_mm_movepi64_pi64(test_vec[i].a);
5268 simde_assert_m64_i64(r, ==, test_vec[i].r);
5269 }
5270
5271 return 0;
5272 }
5273
5274 static int
5275 test_simde_mm_movpi64_epi64(SIMDE_MUNIT_TEST_ARGS) {
5276 const struct {
5277 simde__m64 a;
5278 simde__m128i r;
5279 } test_vec[8] = {
5280 { simde_x_mm_set_pi64(INT64_C(8307669974137432024)),
5281 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 8307669974137432024)) },
5282 { simde_x_mm_set_pi64(INT64_C(-6174863101947913477)),
5283 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-6174863101947913477)) },
5284 { simde_x_mm_set_pi64(INT64_C(-3709498539865079997)),
5285 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-3709498539865079997)) },
5286 { simde_x_mm_set_pi64(INT64_C(-5655514474221449119)),
5287 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-5655514474221449119)) },
5288 { simde_x_mm_set_pi64(INT64_C(-4407711847161442183)),
5289 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-4407711847161442183)) },
5290 { simde_x_mm_set_pi64(INT64_C(-7730135383563833284)),
5291 simde_mm_set_epi64x(INT64_C( 0), INT64_C(-7730135383563833284)) },
5292 { simde_x_mm_set_pi64(INT64_C(1417829150564629578)),
5293 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 1417829150564629578)) },
5294 { simde_x_mm_set_pi64(INT64_C(5667864625160412978)),
5295 simde_mm_set_epi64x(INT64_C( 0), INT64_C( 5667864625160412978)) }
5296 };
5297
5298 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5299 simde__m128i r = simde_mm_movpi64_epi64(test_vec[i].a);
5300 simde_assert_m128i_i64(r, ==, test_vec[i].r);
5301 }
5302
5303 return 0;
5304 }
5305
5306 static int
5307 test_simde_mm_mul_epu32(SIMDE_MUNIT_TEST_ARGS) {
5308 const struct {
5309 simde__m128i a;
5310 simde__m128i b;
5311 simde__m128i r;
5312 } test_vec[8] = {
5313 { simde_x_mm_set_epu32 (UINT32_C(1251634950), UINT32_C( 3010014325), UINT32_C( 3844926313), UINT32_C( 887004237)),
5314 simde_x_mm_set_epu32 (UINT32_C( 771412494), UINT32_C( 328239887), UINT32_C( 3360452571), UINT32_C( 298292680)),
5315 simde_x_mm_set_epu64x(UINT64_C( 988006761906381275), UINT64_C( 264586871026085160)) },
5316 { simde_x_mm_set_epu32 (UINT32_C(4076207020), UINT32_C( 368393187), UINT32_C( 3498508084), UINT32_C( 981141316)),
5317 simde_x_mm_set_epu32 (UINT32_C( 81654802), UINT32_C( 140153335), UINT32_C( 2110173535), UINT32_C( 3134478151)),
5318 simde_x_mm_set_epu64x(UINT64_C( 51631533749328645), UINT64_C( 3075366018045386716)) },
5319 { simde_x_mm_set_epu32 (UINT32_C(4261415154), UINT32_C( 846454649), UINT32_C( 108194122), UINT32_C( 4167432393)),
5320 simde_x_mm_set_epu32 (UINT32_C(2698880481), UINT32_C( 1287129030), UINT32_C( 2616406220), UINT32_C( 1248265871)),
5321 simde_x_mm_set_epu64x(UINT64_C( 1089496351306360470), UINT64_C( 5202063625881759303)) },
5322 { simde_x_mm_set_epu32 (UINT32_C( 911193301), UINT32_C( 1110766386), UINT32_C( 3009613617), UINT32_C( 1645784878)),
5323 simde_x_mm_set_epu32 (UINT32_C(3094480659), UINT32_C( 3697181600), UINT32_C( 4236850839), UINT32_C( 2133678416)),
5324 simde_x_mm_set_epu64x(UINT64_C( 4106705044217697600), UINT64_C( 3511575671567793248)) },
5325 { simde_x_mm_set_epu32 (UINT32_C(2154112155), UINT32_C( 2960710803), UINT32_C( 2851801912), UINT32_C( 678710951)),
5326 simde_x_mm_set_epu32 (UINT32_C(4001207654), UINT32_C( 4056994829), UINT32_C( 1341523746), UINT32_C( 568161818)),
5327 simde_x_mm_set_epu64x(UINT64_C(12011588417935437687), UINT64_C( 385617647816668918)) },
5328 { simde_x_mm_set_epu32 (UINT32_C(3273494172), UINT32_C( 3612698350), UINT32_C( 4103906203), UINT32_C( 1678207566)),
5329 simde_x_mm_set_epu32 (UINT32_C( 48120942), UINT32_C( 160747207), UINT32_C( 2820564214), UINT32_C( 1404181744)),
5330 simde_x_mm_set_epu64x(UINT64_C( 580731169496008450), UINT64_C( 2356508426819875104)) },
5331 { simde_x_mm_set_epu32 (UINT32_C( 396392525), UINT32_C( 2486526122), UINT32_C( 1177281917), UINT32_C( 3038155803)),
5332 simde_x_mm_set_epu32 (UINT32_C( 760783698), UINT32_C( 1253190575), UINT32_C( 4064848310), UINT32_C( 1630883223)),
5333 simde_x_mm_set_epu64x(UINT64_C( 3116091100581700150), UINT64_C( 4954877327972793069)) },
5334 { simde_x_mm_set_epu32 (UINT32_C(1438827395), UINT32_C( 1294325524), UINT32_C( 3245229436), UINT32_C( 122146781)),
5335 simde_x_mm_set_epu32 (UINT32_C(1030238038), UINT32_C( 4273209339), UINT32_C( 197838277), UINT32_C( 1151380764)),
5336 simde_x_mm_set_epu64x(UINT64_C( 5530923916862868636), UINT64_C( 140637454027920684)) }
5337 };
5338
5339 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5340 simde__m128i r = simde_mm_mul_epu32(test_vec[i].a, test_vec[i].b);
5341 simde_assert_m128i_u64(r, ==, test_vec[i].r);
5342 }
5343
5344 return 0;
5345 }
5346
5347 static int
5348 test_simde_mm_mul_pd(SIMDE_MUNIT_TEST_ARGS) {
5349 const struct {
5350 simde__m128d a;
5351 simde__m128d b;
5352 simde__m128d r;
5353 } test_vec[8] = {
5354 { simde_mm_set_pd(SIMDE_FLOAT64_C( 602.19), SIMDE_FLOAT64_C( -103.53)),
5355 simde_mm_set_pd(SIMDE_FLOAT64_C( -952.79), SIMDE_FLOAT64_C( -150.84)),
5356 simde_mm_set_pd(SIMDE_FLOAT64_C(-573760.61), SIMDE_FLOAT64_C( 15616.47)) },
5357 { simde_mm_set_pd(SIMDE_FLOAT64_C( -140.84), SIMDE_FLOAT64_C( -241.95)),
5358 simde_mm_set_pd(SIMDE_FLOAT64_C( 540.86), SIMDE_FLOAT64_C( -754.39)),
5359 simde_mm_set_pd(SIMDE_FLOAT64_C( -76174.72), SIMDE_FLOAT64_C( 182524.66)) },
5360 { simde_mm_set_pd(SIMDE_FLOAT64_C( -648.21), SIMDE_FLOAT64_C( -612.11)),
5361 simde_mm_set_pd(SIMDE_FLOAT64_C( -327.08), SIMDE_FLOAT64_C( -865.34)),
5362 simde_mm_set_pd(SIMDE_FLOAT64_C( 212016.53), SIMDE_FLOAT64_C( 529683.27)) },
5363 { simde_mm_set_pd(SIMDE_FLOAT64_C( 518.61), SIMDE_FLOAT64_C( -573.43)),
5364 simde_mm_set_pd(SIMDE_FLOAT64_C( -650.79), SIMDE_FLOAT64_C( 196.03)),
5365 simde_mm_set_pd(SIMDE_FLOAT64_C(-337506.20), SIMDE_FLOAT64_C(-112409.48)) },
5366 { simde_mm_set_pd(SIMDE_FLOAT64_C( -750.40), SIMDE_FLOAT64_C( 324.63)),
5367 simde_mm_set_pd(SIMDE_FLOAT64_C( 343.74), SIMDE_FLOAT64_C( -4.14)),
5368 simde_mm_set_pd(SIMDE_FLOAT64_C(-257942.50), SIMDE_FLOAT64_C( -1343.97)) },
5369 { simde_mm_set_pd(SIMDE_FLOAT64_C( -48.73), SIMDE_FLOAT64_C( 769.19)),
5370 simde_mm_set_pd(SIMDE_FLOAT64_C( 268.16), SIMDE_FLOAT64_C( -953.46)),
5371 simde_mm_set_pd(SIMDE_FLOAT64_C( -13067.44), SIMDE_FLOAT64_C(-733391.90)) },
5372 { simde_mm_set_pd(SIMDE_FLOAT64_C( 188.02), SIMDE_FLOAT64_C( 614.87)),
5373 simde_mm_set_pd(SIMDE_FLOAT64_C( 396.91), SIMDE_FLOAT64_C( -399.68)),
5374 simde_mm_set_pd(SIMDE_FLOAT64_C( 74627.02), SIMDE_FLOAT64_C(-245751.24)) },
5375 { simde_mm_set_pd(SIMDE_FLOAT64_C( 813.52), SIMDE_FLOAT64_C( 480.96)),
5376 simde_mm_set_pd(SIMDE_FLOAT64_C( 664.31), SIMDE_FLOAT64_C( 447.07)),
5377 simde_mm_set_pd(SIMDE_FLOAT64_C( 540429.47), SIMDE_FLOAT64_C( 215022.79)) }
5378 };
5379
5380 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5381 simde__m128d r = simde_mm_mul_pd(test_vec[i].a, test_vec[i].b);
5382 simde_assert_m128d_close(r, test_vec[i].r, 1);
5383 }
5384
5385 return 0;
5386 }
5387
5388 static int
5389 test_simde_mm_mul_sd(SIMDE_MUNIT_TEST_ARGS) {
5390 const struct {
5391 simde__m128d a;
5392 simde__m128d b;
5393 simde__m128d r;
5394 } test_vec[8] = {
5395 { simde_mm_set_pd(SIMDE_FLOAT64_C( 815.66), SIMDE_FLOAT64_C( 839.23)),
5396 simde_mm_set_pd(SIMDE_FLOAT64_C( 748.66), SIMDE_FLOAT64_C( -52.12)),
5397 simde_mm_set_pd(SIMDE_FLOAT64_C( 815.66), SIMDE_FLOAT64_C( -43740.67)) },
5398 { simde_mm_set_pd(SIMDE_FLOAT64_C( -714.36), SIMDE_FLOAT64_C( -808.00)),
5399 simde_mm_set_pd(SIMDE_FLOAT64_C( 401.75), SIMDE_FLOAT64_C( 319.13)),
5400 simde_mm_set_pd(SIMDE_FLOAT64_C( -714.36), SIMDE_FLOAT64_C(-257857.04)) },
5401 { simde_mm_set_pd(SIMDE_FLOAT64_C( 453.36), SIMDE_FLOAT64_C( -764.44)),
5402 simde_mm_set_pd(SIMDE_FLOAT64_C( -934.41), SIMDE_FLOAT64_C( -454.88)),
5403 simde_mm_set_pd(SIMDE_FLOAT64_C( 453.36), SIMDE_FLOAT64_C( 347728.47)) },
5404 { simde_mm_set_pd(SIMDE_FLOAT64_C( 489.60), SIMDE_FLOAT64_C( 724.31)),
5405 simde_mm_set_pd(SIMDE_FLOAT64_C( -101.25), SIMDE_FLOAT64_C( 196.93)),
5406 simde_mm_set_pd(SIMDE_FLOAT64_C( 489.60), SIMDE_FLOAT64_C( 142638.37)) },
5407 { simde_mm_set_pd(SIMDE_FLOAT64_C( 187.03), SIMDE_FLOAT64_C( 665.07)),
5408 simde_mm_set_pd(SIMDE_FLOAT64_C( 487.15), SIMDE_FLOAT64_C( 851.16)),
5409 simde_mm_set_pd(SIMDE_FLOAT64_C( 187.03), SIMDE_FLOAT64_C( 566080.98)) },
5410 { simde_mm_set_pd(SIMDE_FLOAT64_C( 589.48), SIMDE_FLOAT64_C( 648.27)),
5411 simde_mm_set_pd(SIMDE_FLOAT64_C( -683.48), SIMDE_FLOAT64_C( -59.67)),
5412 simde_mm_set_pd(SIMDE_FLOAT64_C( 589.48), SIMDE_FLOAT64_C( -38682.27)) },
5413 { simde_mm_set_pd(SIMDE_FLOAT64_C( 838.61), SIMDE_FLOAT64_C( 822.18)),
5414 simde_mm_set_pd(SIMDE_FLOAT64_C( -364.43), SIMDE_FLOAT64_C( 962.26)),
5415 simde_mm_set_pd(SIMDE_FLOAT64_C( 838.61), SIMDE_FLOAT64_C( 791150.93)) },
5416 { simde_mm_set_pd(SIMDE_FLOAT64_C( 530.83), SIMDE_FLOAT64_C( 379.76)),
5417 simde_mm_set_pd(SIMDE_FLOAT64_C( 27.92), SIMDE_FLOAT64_C( -56.09)),
5418 simde_mm_set_pd(SIMDE_FLOAT64_C( 530.83), SIMDE_FLOAT64_C( -21300.74)) }
5419 };
5420
5421 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5422 simde__m128d r = simde_mm_mul_sd(test_vec[i].a, test_vec[i].b);
5423 simde_assert_m128d_close(r, test_vec[i].r, 1);
5424 }
5425
5426 return 0;
5427 }
5428
5429 static int
5430 test_simde_mm_mul_su32(SIMDE_MUNIT_TEST_ARGS) {
5431 const struct {
5432 simde__m64 a;
5433 simde__m64 b;
5434 simde__m64 r;
5435 } test_vec[8] = {
5436 { simde_x_mm_set_pu32(UINT32_C(3055040779), UINT32_C( 899100968)),
5437 simde_x_mm_set_pu32(UINT32_C(1940650668), UINT32_C(3777451497)),
5438 simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 3396310297525749096))) },
5439 { simde_x_mm_set_pu32(UINT32_C(2705843438), UINT32_C(2434885276)),
5440 simde_x_mm_set_pu32(UINT32_C(3024316392), UINT32_C(3861898348)),
5441 simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 9403279424953924048))) },
5442 { simde_x_mm_set_pu32(UINT32_C(3766308026), UINT32_C(1712773120)),
5443 simde_x_mm_set_pu32(UINT32_C( 817218479), UINT32_C(3651399110)),
5444 simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 6254018245999923200))) },
5445 { simde_x_mm_set_pu32(UINT32_C( 434012470), UINT32_C(1054365092)),
5446 simde_x_mm_set_pu32(UINT32_C(2682784668), UINT32_C(2536059630)),
5447 simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 2673932745102435960))) },
5448 { simde_x_mm_set_pu32(UINT32_C(3086788421), UINT32_C( 996821946)),
5449 simde_x_mm_set_pu32(UINT32_C(3201780597), UINT32_C(3958985305)),
5450 simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 3946403435915503530))) },
5451 { simde_x_mm_set_pu32(UINT32_C(3277786031), UINT32_C(4257890741)),
5452 simde_x_mm_set_pu32(UINT32_C(1195509971), UINT32_C(2579552899)),
5453 simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C(10983454404571808159))) },
5454 { simde_x_mm_set_pu32(UINT32_C(3106450314), UINT32_C(1125697671)),
5455 simde_x_mm_set_pu32(UINT32_C(2878635182), UINT32_C(3892244414)),
5456 simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 4381490471802559794))) },
5457 { simde_x_mm_set_pu32(UINT32_C(2670515723), UINT32_C(3917703761)),
5458 simde_x_mm_set_pu32(UINT32_C(3656211314), UINT32_C(2327792170)),
5459 simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 9119600139235351370))) }
5460 };
5461
5462 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5463 simde__m64 r = simde_mm_mul_su32(test_vec[i].a, test_vec[i].b);
5464 simde_assert_m64_u64(r, ==, test_vec[i].r);
5465 }
5466
5467 return 0;
5468 }
5469
5470 static int
5471 test_simde_mm_mulhi_epi16(SIMDE_MUNIT_TEST_ARGS) {
5472 const struct {
5473 simde__m128i a;
5474 simde__m128i b;
5475 simde__m128i r;
5476 } test_vec[8] = {
5477 { simde_mm_set_epi16(INT16_C(-28198), INT16_C(-30713), INT16_C( 20992), INT16_C(-15285),
5478 INT16_C( 16558), INT16_C(-12771), INT16_C(-10872), INT16_C(-32584)),
5479 simde_mm_set_epi16(INT16_C( -2396), INT16_C(-16729), INT16_C( 31162), INT16_C(-10205),
5480 INT16_C( 24928), INT16_C( 5223), INT16_C( 7262), INT16_C( 25352)),
5481 simde_mm_set_epi16(INT16_C( 1030), INT16_C( 7839), INT16_C( 9981), INT16_C( 2380),
5482 INT16_C( 6298), INT16_C( -1018), INT16_C( -1205), INT16_C(-12605)) },
5483 { simde_mm_set_epi16(INT16_C(-29475), INT16_C( -4667), INT16_C( 18782), INT16_C(-15431),
5484 INT16_C(-27740), INT16_C( 28051), INT16_C( 4978), INT16_C( 1222)),
5485 simde_mm_set_epi16(INT16_C(-10541), INT16_C(-14468), INT16_C( 18685), INT16_C( 12375),
5486 INT16_C( -5884), INT16_C(-11112), INT16_C( 23337), INT16_C( 12576)),
5487 simde_mm_set_epi16(INT16_C( 4740), INT16_C( 1030), INT16_C( 5354), INT16_C( -2914),
5488 INT16_C( 2490), INT16_C( -4757), INT16_C( 1772), INT16_C( 234)) },
5489 { simde_mm_set_epi16(INT16_C( 27783), INT16_C( 6960), INT16_C( 17513), INT16_C( -7755),
5490 INT16_C( 14695), INT16_C( 12404), INT16_C( -4129), INT16_C(-25366)),
5491 simde_mm_set_epi16(INT16_C( 29475), INT16_C( 25763), INT16_C( 29366), INT16_C( 12820),
5492 INT16_C( -5355), INT16_C( 7751), INT16_C(-24426), INT16_C( -6617)),
5493 simde_mm_set_epi16(INT16_C( 12495), INT16_C( 2736), INT16_C( 7847), INT16_C( -1518),
5494 INT16_C( -1201), INT16_C( 1467), INT16_C( 1538), INT16_C( 2561)) },
5495 { simde_mm_set_epi16(INT16_C( 8852), INT16_C( 11654), INT16_C( 12030), INT16_C( 21843),
5496 INT16_C( 27012), INT16_C( 24122), INT16_C( -4121), INT16_C( 19864)),
5497 simde_mm_set_epi16(INT16_C(-24799), INT16_C(-30738), INT16_C( 19688), INT16_C(-21919),
5498 INT16_C( 23874), INT16_C( -4632), INT16_C(-21648), INT16_C(-28317)),
5499 simde_mm_set_epi16(INT16_C( -3350), INT16_C( -5467), INT16_C( 3613), INT16_C( -7306),
5500 INT16_C( 9840), INT16_C( -1705), INT16_C( 1361), INT16_C( -8583)) },
5501 { simde_mm_set_epi16(INT16_C( 2959), INT16_C(-18532), INT16_C( 4909), INT16_C( 17932),
5502 INT16_C( 9150), INT16_C( 13660), INT16_C(-28547), INT16_C( 5006)),
5503 simde_mm_set_epi16(INT16_C( 16706), INT16_C(-30015), INT16_C(-32638), INT16_C( 13608),
5504 INT16_C( -7846), INT16_C( 14914), INT16_C(-15409), INT16_C(-27711)),
5505 simde_mm_set_epi16(INT16_C( 754), INT16_C( 8487), INT16_C( -2445), INT16_C( 3723),
5506 INT16_C( -1096), INT16_C( 3108), INT16_C( 6712), INT16_C( -2117)) },
5507 { simde_mm_set_epi16(INT16_C( 23854), INT16_C(-13644), INT16_C(-14015), INT16_C(-13375),
5508 INT16_C(-26086), INT16_C( -6430), INT16_C( -5411), INT16_C( 7716)),
5509 simde_mm_set_epi16(INT16_C( -3281), INT16_C(-16733), INT16_C(-20310), INT16_C( 760),
5510 INT16_C(-18586), INT16_C( 1673), INT16_C(-25298), INT16_C(-31758)),
5511 simde_mm_set_epi16(INT16_C( -1195), INT16_C( 3483), INT16_C( 4343), INT16_C( -156),
5512 INT16_C( 7397), INT16_C( -165), INT16_C( 2088), INT16_C( -3740)) },
5513 { simde_mm_set_epi16(INT16_C( 5449), INT16_C( 38), INT16_C( 6018), INT16_C( 10627),
5514 INT16_C( 20505), INT16_C( 28284), INT16_C( 4633), INT16_C(-26325)),
5515 simde_mm_set_epi16(INT16_C( 24784), INT16_C( 11314), INT16_C( 7455), INT16_C( 17813),
5516 INT16_C( -6570), INT16_C(-17283), INT16_C( 30512), INT16_C( 2646)),
5517 simde_mm_set_epi16(INT16_C( 2060), INT16_C( 6), INT16_C( 684), INT16_C( 2888),
5518 INT16_C( -2056), INT16_C( -7459), INT16_C( 2157), INT16_C( -1063)) },
5519 { simde_mm_set_epi16(INT16_C(-21624), INT16_C( 5121), INT16_C( 20041), INT16_C( 13722),
5520 INT16_C(-24360), INT16_C(-19124), INT16_C(-16069), INT16_C( 19357)),
5521 simde_mm_set_epi16(INT16_C( -7842), INT16_C( 31372), INT16_C(-32681), INT16_C( 23520),
5522 INT16_C( -3879), INT16_C( -7485), INT16_C( 22256), INT16_C( 12396)),
5523 simde_mm_set_epi16(INT16_C( 2587), INT16_C( 2451), INT16_C( -9994), INT16_C( 4924),
5524 INT16_C( 1441), INT16_C( 2184), INT16_C( -5458), INT16_C( 3661)) }
5525 };
5526
5527 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5528 simde__m128i r = simde_mm_mulhi_epi16(test_vec[i].a, test_vec[i].b);
5529 simde_assert_m128i_i16(r, ==, test_vec[i].r);
5530 }
5531
5532 return 0;
5533 }
5534
5535 static int
5536 test_simde_mm_mulhi_epu16(SIMDE_MUNIT_TEST_ARGS) {
5537 const struct {
5538 simde__m128i a;
5539 simde__m128i b;
5540 simde__m128i r;
5541 } test_vec[8] = {
5542 { simde_x_mm_set_epu16(35566, 15689, 63042, 57362, 59041, 31224, 19546, 12829),
5543 simde_x_mm_set_epu16(51447, 14621, 39095, 25022, 7138, 40387, 23161, 61024),
5544 simde_x_mm_set_epu16(27919, 3500, 37607, 21901, 6430, 19241, 6907, 11945) },
5545 { simde_x_mm_set_epu16(38922, 8893, 7997, 20067, 60307, 12929, 44791, 36818),
5546 simde_x_mm_set_epu16(56115, 46352, 39645, 27986, 64864, 64084, 5079, 17389),
5547 simde_x_mm_set_epu16(33326, 6289, 4837, 8569, 59688, 12642, 3471, 9769) },
5548 { simde_x_mm_set_epu16(15336, 63669, 63771, 21657, 12681, 61746, 3959, 20213),
5549 simde_x_mm_set_epu16(61649, 7462, 20857, 18418, 43120, 17135, 41045, 26167),
5550 simde_x_mm_set_epu16(14426, 7249, 20295, 6086, 8343, 16144, 2479, 8070) },
5551 { simde_x_mm_set_epu16(18737, 50787, 58977, 18610, 8077, 2942, 26014, 51355),
5552 simde_x_mm_set_epu16( 1776, 1953, 55756, 22299, 19400, 25284, 34496, 57058),
5553 simde_x_mm_set_epu16( 507, 1513, 50175, 6332, 2390, 1135, 13692, 44711) },
5554 { simde_x_mm_set_epu16(10154, 39850, 18306, 55081, 15606, 51707, 30878, 20967),
5555 simde_x_mm_set_epu16(43083, 50945, 49120, 63736, 15921, 64165, 33035, 50764),
5556 simde_x_mm_set_epu16( 6675, 30977, 13720, 53568, 3791, 50625, 15564, 16240) },
5557 { simde_x_mm_set_epu16(12757, 5042, 57712, 50374, 33497, 44643, 9249, 27444),
5558 simde_x_mm_set_epu16( 5516, 28001, 37996, 50447, 2209, 25118, 63921, 7578),
5559 simde_x_mm_set_epu16( 1073, 2154, 33459, 38775, 1129, 17110, 9021, 3173) },
5560 { simde_x_mm_set_epu16( 6520, 35794, 15094, 63136, 22779, 57672, 2423, 7676),
5561 simde_x_mm_set_epu16(20640, 11808, 58236, 53501, 38005, 59820, 7041, 59845),
5562 simde_x_mm_set_epu16( 2053, 6449, 13412, 51541, 13209, 52641, 260, 7009) },
5563 { simde_x_mm_set_epu16(60138, 6017, 21659, 30716, 29807, 17606, 41408, 64807),
5564 simde_x_mm_set_epu16(25712, 8473, 49119, 61515, 61789, 54600, 37356, 34280),
5565 simde_x_mm_set_epu16(23594, 777, 16233, 28831, 28102, 14668, 23602, 33898) }
5566 };
5567
5568 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5569 simde__m128i r = simde_mm_mulhi_epu16(test_vec[i].a, test_vec[i].b);
5570 simde_assert_m128i_u16(r, ==, test_vec[i].r);
5571 }
5572
5573 return 0;
5574 }
5575
5576 static int
5577 test_simde_mm_mullo_epi16(SIMDE_MUNIT_TEST_ARGS) {
5578 const struct {
5579 simde__m128i a;
5580 simde__m128i b;
5581 simde__m128i r;
5582 } test_vec[8] = {
5583 { simde_mm_set_epi16(INT16_C( -7862), INT16_C( 26852), INT16_C( 10752), INT16_C( -9273),
5584 INT16_C( -9160), INT16_C( -7080), INT16_C(-16165), INT16_C( -8327)),
5585 simde_mm_set_epi16(INT16_C(-20410), INT16_C( 24193), INT16_C(-22278), INT16_C( -948),
5586 INT16_C(-31925), INT16_C( -8469), INT16_C( 5801), INT16_C( 10383)),
5587 simde_mm_set_epi16(INT16_C( 31292), INT16_C(-27932), INT16_C( 1024), INT16_C( 8980),
5588 INT16_C( 11368), INT16_C( -4920), INT16_C( 8851), INT16_C(-17257)) },
5589 { simde_mm_set_epi16(INT16_C( 10435), INT16_C( 19268), INT16_C( 27420), INT16_C( 9542),
5590 INT16_C(-22355), INT16_C( 22255), INT16_C(-32016), INT16_C( 23304)),
5591 simde_mm_set_epi16(INT16_C( -3883), INT16_C( 14714), INT16_C(-16367), INT16_C( 4175),
5592 INT16_C( 13386), INT16_C( 20048), INT16_C(-30329), INT16_C(-26826)),
5593 simde_mm_set_epi16(INT16_C(-17857), INT16_C( 616), INT16_C( 7388), INT16_C( -8038),
5594 INT16_C( -6654), INT16_C( -848), INT16_C( 31888), INT16_C( -5200)) },
5595 { simde_mm_set_epi16(INT16_C( 16747), INT16_C(-31494), INT16_C( -6008), INT16_C( 256),
5596 INT16_C( 13584), INT16_C( -2628), INT16_C( 32210), INT16_C(-21204)),
5597 simde_mm_set_epi16(INT16_C( 5844), INT16_C(-28058), INT16_C( -1961), INT16_C( -4057),
5598 INT16_C(-28767), INT16_C(-15421), INT16_C(-28399), INT16_C( 6019)),
5599 simde_mm_set_epi16(INT16_C( 24220), INT16_C(-28772), INT16_C(-14792), INT16_C( 9984),
5600 INT16_C( 20240), INT16_C( 25140), INT16_C( 19698), INT16_C(-28284)) },
5601 { simde_mm_set_epi16(INT16_C( -6420), INT16_C( -8597), INT16_C( -3796), INT16_C( 23244),
5602 INT16_C(-31410), INT16_C( -804), INT16_C( 31623), INT16_C( -736)),
5603 simde_mm_set_epi16(INT16_C( -5973), INT16_C( -2870), INT16_C( -5873), INT16_C( -1641),
5604 INT16_C( -1760), INT16_C( 10653), INT16_C(-28567), INT16_C( 14335)),
5605 simde_mm_set_epi16(INT16_C( 8100), INT16_C( 31854), INT16_C( 11668), INT16_C( -1452),
5606 INT16_C(-30784), INT16_C( 20204), INT16_C(-26017), INT16_C( 736)) },
5607 { simde_mm_set_epi16(INT16_C(-30942), INT16_C( 23208), INT16_C( -332), INT16_C(-26357),
5608 INT16_C( -4575), INT16_C( 25713), INT16_C(-11436), INT16_C(-20469)),
5609 simde_mm_set_epi16(INT16_C( 10752), INT16_C( -6855), INT16_C(-32031), INT16_C( 11523),
5610 INT16_C( 341), INT16_C( 13013), INT16_C( 12462), INT16_C(-19043)),
5611 simde_mm_set_epi16(INT16_C(-27648), INT16_C( 30568), INT16_C( 17460), INT16_C(-17887),
5612 INT16_C( 12789), INT16_C(-23547), INT16_C( 25368), INT16_C(-16961)) },
5613 { simde_mm_set_epi16(INT16_C( -9419), INT16_C(-28719), INT16_C( 16604), INT16_C( 20761),
5614 INT16_C( 7656), INT16_C( 31821), INT16_C( 14202), INT16_C(-12774)),
5615 simde_mm_set_epi16(INT16_C(-24440), INT16_C( -4751), INT16_C(-13213), INT16_C( 10351),
5616 INT16_C( 25105), INT16_C( -3784), INT16_C( 2889), INT16_C( 15532)),
5617 simde_mm_set_epi16(INT16_C(-27608), INT16_C( -1983), INT16_C( 25876), INT16_C( 4567),
5618 INT16_C(-13208), INT16_C(-21032), INT16_C( 4042), INT16_C(-28296)) },
5619 { simde_mm_set_epi16(INT16_C( 24274), INT16_C( 31467), INT16_C( 17654), INT16_C(-30184),
5620 INT16_C( -7163), INT16_C( 32482), INT16_C( 19535), INT16_C(-21227)),
5621 simde_mm_set_epi16(INT16_C( 18405), INT16_C(-30234), INT16_C( 7564), INT16_C(-18060),
5622 INT16_C( 16638), INT16_C(-17950), INT16_C( -411), INT16_C(-23904)),
5623 simde_mm_set_epi16(INT16_C( 4058), INT16_C( 12834), INT16_C(-27512), INT16_C( -5408),
5624 INT16_C( 31990), INT16_C( 21892), INT16_C( 32043), INT16_C( 30496)) },
5625 { simde_mm_set_epi16(INT16_C(-10768), INT16_C(-21062), INT16_C( 22181), INT16_C( 31606),
5626 INT16_C( 16135), INT16_C(-14823), INT16_C(-19116), INT16_C(-13035)),
5627 simde_mm_set_epi16(INT16_C( 25288), INT16_C(-13107), INT16_C(-24173), INT16_C(-10010),
5628 INT16_C(-10251), INT16_C( 9523), INT16_C( 29977), INT16_C(-13646)),
5629 simde_mm_set_epi16(INT16_C( 896), INT16_C( 22002), INT16_C(-31297), INT16_C( 31748),
5630 INT16_C( 12979), INT16_C( 5115), INT16_C( 6452), INT16_C( 10906)) }
5631 };
5632
5633 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5634 simde__m128i r = simde_mm_mullo_epi16(test_vec[i].a, test_vec[i].b);
5635 simde_assert_m128i_i16(r, ==, test_vec[i].r);
5636 }
5637
5638 return 0;
5639 }
5640
5641 static int
5642 test_simde_mm_or_pd(SIMDE_MUNIT_TEST_ARGS) {
5643 const struct {
5644 simde__m128d a;
5645 simde__m128d b;
5646 simde__m128d r;
5647 } test_vec[8] = {
5648 { simde_mm_set_pd(SIMDE_FLOAT64_C( 724.92), SIMDE_FLOAT64_C( 616.22)),
5649 simde_mm_set_pd(SIMDE_FLOAT64_C( 797.85), SIMDE_FLOAT64_C( 484.18)),
5650 simde_mm_set_pd(SIMDE_FLOAT64_C( 989.98), SIMDE_FLOAT64_C( 128062.24)) },
5651 { simde_mm_set_pd(SIMDE_FLOAT64_C( 482.51), SIMDE_FLOAT64_C( 841.87)),
5652 simde_mm_set_pd(SIMDE_FLOAT64_C( -558.83), SIMDE_FLOAT64_C( 997.07)),
5653 simde_mm_set_pd(SIMDE_FLOAT64_C(-129002.75), SIMDE_FLOAT64_C( 1005.87)) },
5654 { simde_mm_set_pd(SIMDE_FLOAT64_C( 741.60), SIMDE_FLOAT64_C( -412.08)),
5655 simde_mm_set_pd(SIMDE_FLOAT64_C( -337.67), SIMDE_FLOAT64_C( -516.98)),
5656 simde_mm_set_pd(SIMDE_FLOAT64_C( -95215.80), SIMDE_FLOAT64_C(-106109.48)) },
5657 { simde_mm_set_pd(SIMDE_FLOAT64_C( 724.85), SIMDE_FLOAT64_C( -403.04)),
5658 simde_mm_set_pd(SIMDE_FLOAT64_C( -503.03), SIMDE_FLOAT64_C( -699.51)),
5659 simde_mm_set_pd(SIMDE_FLOAT64_C(-130927.93), SIMDE_FLOAT64_C(-122827.50)) },
5660 { simde_mm_set_pd(SIMDE_FLOAT64_C( 231.42), SIMDE_FLOAT64_C( 688.03)),
5661 simde_mm_set_pd(SIMDE_FLOAT64_C( -373.50), SIMDE_FLOAT64_C( 983.44)),
5662 simde_mm_set_pd(SIMDE_FLOAT64_C( -511.84), SIMDE_FLOAT64_C( 1015.47)) },
5663 { simde_mm_set_pd(SIMDE_FLOAT64_C( 625.94), SIMDE_FLOAT64_C( -703.47)),
5664 simde_mm_set_pd(SIMDE_FLOAT64_C( -942.06), SIMDE_FLOAT64_C( 249.38)),
5665 simde_mm_set_pd(SIMDE_FLOAT64_C( -1024.00), SIMDE_FLOAT64_C( -65535.34)) },
5666 { simde_mm_set_pd(SIMDE_FLOAT64_C( -97.92), SIMDE_FLOAT64_C( -70.84)),
5667 simde_mm_set_pd(SIMDE_FLOAT64_C( -510.77), SIMDE_FLOAT64_C( -381.02)),
5668 simde_mm_set_pd(SIMDE_FLOAT64_C( -511.93), SIMDE_FLOAT64_C( -383.36)) },
5669 { simde_mm_set_pd(SIMDE_FLOAT64_C( -350.87), SIMDE_FLOAT64_C( -439.10)),
5670 simde_mm_set_pd(SIMDE_FLOAT64_C( 66.40), SIMDE_FLOAT64_C( 195.88)),
5671 simde_mm_set_pd(SIMDE_FLOAT64_C( -351.87), SIMDE_FLOAT64_C( -439.86)) }
5672 };
5673
5674 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5675 simde__m128d r = simde_mm_or_pd(test_vec[i].a, test_vec[i].b);
5676 simde_assert_m128d_close(r, test_vec[i].r, 1);
5677 }
5678
5679 return 0;
5680 }
5681
5682 static int
5683 test_simde_mm_or_si128(SIMDE_MUNIT_TEST_ARGS) {
5684 const struct {
5685 simde__m128i a;
5686 simde__m128i b;
5687 simde__m128i r;
5688 } test_vec[8] = {
5689 { simde_mm_set_epi64x(INT64_C( 3806780817851842454), INT64_C( 3002076500639794819)),
5690 simde_mm_set_epi64x(INT64_C( -1576369425501019200), INT64_C( 5863973371898850910)),
5691 simde_mm_set_epi64x(INT64_C( -81065909581643818), INT64_C( 8784698508288454367)) },
5692 { simde_mm_set_epi64x(INT64_C( 4358272343769327172), INT64_C( -4254544166297055533)),
5693 simde_mm_set_epi64x(INT64_C( -3870591542062132163), INT64_C( 8365983368440196218)),
5694 simde_mm_set_epi64x(INT64_C( -109223286268234115), INT64_C( -793210092996038917)) },
5695 { simde_mm_set_epi64x(INT64_C( -8935978336450140157), INT64_C( -4292132981830530492)),
5696 simde_mm_set_epi64x(INT64_C( -5275996428160709349), INT64_C( 1809702168782653061)),
5697 simde_mm_set_epi64x(INT64_C( -5188173984729010917), INT64_C( -2486163139644895547)) },
5698 { simde_mm_set_epi64x(INT64_C( -3617483608260678394), INT64_C( -7299761588855953181)),
5699 simde_mm_set_epi64x(INT64_C( -3679366837934484296), INT64_C( 5342128716508209170)),
5700 simde_mm_set_epi64x(INT64_C( -3603900203459740226), INT64_C( -2687789418219853581)) },
5701 { simde_mm_set_epi64x(INT64_C( 8613776548693408177), INT64_C( -1221094295236221778)),
5702 simde_mm_set_epi64x(INT64_C( 8491744443283364215), INT64_C( 4783609441494973751)),
5703 simde_mm_set_epi64x(INT64_C( 8636493096189557239), INT64_C( -1193507273608823361)) },
5704 { simde_mm_set_epi64x(INT64_C( 2256952633337952767), INT64_C( -5574602856706714295)),
5705 simde_mm_set_epi64x(INT64_C( -7711313128986328449), INT64_C( -5631421726257218112)),
5706 simde_mm_set_epi64x(INT64_C( -6918672559143650305), INT64_C( -5477736148453327415)) },
5707 { simde_mm_set_epi64x(INT64_C( 6915809581026069253), INT64_C( -4447049561909832301)),
5708 simde_mm_set_epi64x(INT64_C( 7606865206928880870), INT64_C( 526097040835303983)),
5709 simde_mm_set_epi64x(INT64_C( 9221656517182193639), INT64_C( -4085336622434885697)) },
5710 { simde_mm_set_epi64x(INT64_C( 6255716227368614659), INT64_C( 3842255123517004943)),
5711 simde_mm_set_epi64x(INT64_C( -2544504471973996098), INT64_C( 7324902981920444710)),
5712 simde_mm_set_epi64x(INT64_C( -2382159098826458177), INT64_C( 8500384867471056303)) }
5713 };
5714
5715 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5716 simde__m128i r = simde_mm_or_si128(test_vec[i].a, test_vec[i].b);
5717 simde_assert_m128i_i8(r, ==, test_vec[i].r);
5718 }
5719
5720 return 0;
5721 }
5722
5723 static int
5724 test_simde_mm_packs_epi16(SIMDE_MUNIT_TEST_ARGS) {
5725 const struct {
5726 simde__m128i a;
5727 simde__m128i b;
5728 simde__m128i r;
5729 } test_vec[8] = {
5730 { simde_mm_set_epi16(INT16_C(-22268), INT16_C( -16), INT16_C( -49), INT16_C( 8),
5731 INT16_C( 20029), INT16_C(-30901), INT16_C(-17364), INT16_C( -65)),
5732 simde_mm_set_epi16(INT16_C(-20429), INT16_C( 4131), INT16_C(-19140), INT16_C( 23907),
5733 INT16_C( -87), INT16_C(-15818), INT16_C( -93), INT16_C( -34)),
5734 simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( -87), INT8_C(-128), INT8_C( -93), INT8_C( -34),
5735 INT8_C(-128), INT8_C( -16), INT8_C( -49), INT8_C( 8), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( -65)) },
5736 { simde_mm_set_epi16(INT16_C( -1320), INT16_C( 64), INT16_C( 7903), INT16_C( -86),
5737 INT16_C( 17775), INT16_C( -29), INT16_C(-24347), INT16_C( 20534)),
5738 simde_mm_set_epi16(INT16_C( -26), INT16_C( 32460), INT16_C( -35), INT16_C( 9),
5739 INT16_C( 97), INT16_C(-16116), INT16_C( 21908), INT16_C( 31051)),
5740 simde_mm_set_epi8(INT8_C( -26), INT8_C( 127), INT8_C( -35), INT8_C( 9), INT8_C( 97), INT8_C(-128), INT8_C( 127), INT8_C( 127),
5741 INT8_C(-128), INT8_C( 64), INT8_C( 127), INT8_C( -86), INT8_C( 127), INT8_C( -29), INT8_C(-128), INT8_C( 127)) },
5742 { simde_mm_set_epi16(INT16_C( 16), INT16_C(-23521), INT16_C( 107), INT16_C( 10693),
5743 INT16_C( 37), INT16_C( 32277), INT16_C( -120), INT16_C( -13)),
5744 simde_mm_set_epi16(INT16_C( 7912), INT16_C( 127), INT16_C(-27046), INT16_C( -104),
5745 INT16_C( 114), INT16_C( -54), INT16_C( -26), INT16_C( 29057)),
5746 simde_mm_set_epi8(INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-104), INT8_C( 114), INT8_C( -54), INT8_C( -26), INT8_C( 127),
5747 INT8_C( 16), INT8_C(-128), INT8_C( 107), INT8_C( 127), INT8_C( 37), INT8_C( 127), INT8_C(-120), INT8_C( -13)) },
5748 { simde_mm_set_epi16(INT16_C( 8), INT16_C( -84), INT16_C( 26), INT16_C( -1727),
5749 INT16_C( 53), INT16_C( 29056), INT16_C( -7932), INT16_C( 40)),
5750 simde_mm_set_epi16(INT16_C(-25560), INT16_C( 94), INT16_C( 19164), INT16_C( -119),
5751 INT16_C(-25450), INT16_C( 26043), INT16_C( -9549), INT16_C( 110)),
5752 simde_mm_set_epi8(INT8_C(-128), INT8_C( 94), INT8_C( 127), INT8_C(-119), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 110),
5753 INT8_C( 8), INT8_C( -84), INT8_C( 26), INT8_C(-128), INT8_C( 53), INT8_C( 127), INT8_C(-128), INT8_C( 40)) },
5754 { simde_mm_set_epi16(INT16_C( 17087), INT16_C( 3), INT16_C( 26871), INT16_C( 126),
5755 INT16_C(-10072), INT16_C( 95), INT16_C( 117), INT16_C( 110)),
5756 simde_mm_set_epi16(INT16_C( 7667), INT16_C( -3918), INT16_C( -98), INT16_C( -77),
5757 INT16_C( 29383), INT16_C(-21060), INT16_C(-18775), INT16_C( 21121)),
5758 simde_mm_set_epi8(INT8_C( 127), INT8_C(-128), INT8_C( -98), INT8_C( -77), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127),
5759 INT8_C( 127), INT8_C( 3), INT8_C( 127), INT8_C( 126), INT8_C(-128), INT8_C( 95), INT8_C( 117), INT8_C( 110)) },
5760 { simde_mm_set_epi16(INT16_C( -120), INT16_C(-29564), INT16_C( -120), INT16_C( -79),
5761 INT16_C( -93), INT16_C(-23649), INT16_C( 25423), INT16_C(-23661)),
5762 simde_mm_set_epi16(INT16_C( 109), INT16_C(-30808), INT16_C( 45), INT16_C( -18),
5763 INT16_C( -4268), INT16_C( 30580), INT16_C( 77), INT16_C( -1896)),
5764 simde_mm_set_epi8(INT8_C( 109), INT8_C(-128), INT8_C( 45), INT8_C( -18), INT8_C(-128), INT8_C( 127), INT8_C( 77), INT8_C(-128),
5765 INT8_C(-120), INT8_C(-128), INT8_C(-120), INT8_C( -79), INT8_C( -93), INT8_C(-128), INT8_C( 127), INT8_C(-128)) },
5766 { simde_mm_set_epi16(INT16_C( 75), INT16_C( -80), INT16_C( -5), INT16_C( -23),
5767 INT16_C( -9879), INT16_C( 116), INT16_C(-20199), INT16_C( 5095)),
5768 simde_mm_set_epi16(INT16_C( -3339), INT16_C( -48), INT16_C( -117), INT16_C( -2107),
5769 INT16_C( 11715), INT16_C(-13793), INT16_C(-31434), INT16_C( 25021)),
5770 simde_mm_set_epi8(INT8_C(-128), INT8_C( -48), INT8_C(-117), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127),
5771 INT8_C( 75), INT8_C( -80), INT8_C( -5), INT8_C( -23), INT8_C(-128), INT8_C( 116), INT8_C(-128), INT8_C( 127)) },
5772 { simde_mm_set_epi16(INT16_C( -15), INT16_C( 110), INT16_C( -521), INT16_C( 75),
5773 INT16_C( 12019), INT16_C(-30116), INT16_C( 17702), INT16_C( 14401)),
5774 simde_mm_set_epi16(INT16_C(-15008), INT16_C( -80), INT16_C( -127), INT16_C(-29333),
5775 INT16_C( -7), INT16_C(-17846), INT16_C( 83), INT16_C( 25637)),
5776 simde_mm_set_epi8(INT8_C(-128), INT8_C( -80), INT8_C(-127), INT8_C(-128), INT8_C( -7), INT8_C(-128), INT8_C( 83), INT8_C( 127),
5777 INT8_C( -15), INT8_C( 110), INT8_C(-128), INT8_C( 75), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127)) }
5778 };
5779
5780 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5781 simde__m128i r = simde_mm_packs_epi16(test_vec[i].a, test_vec[i].b);
5782 simde_assert_m128i_i8(r, ==, test_vec[i].r);
5783 }
5784
5785 return 0;
5786 }
5787
5788 static int
5789 test_simde_mm_packs_epi32(SIMDE_MUNIT_TEST_ARGS) {
5790 const struct {
5791 simde__m128i a;
5792 simde__m128i b;
5793 simde__m128i r;
5794 } test_vec[8] = {
5795 { simde_mm_set_epi32(INT32_C( 1221393622), INT32_C( 1245122), INT32_C( -546439182), INT32_C( 1653967185)),
5796 simde_mm_set_epi32(INT32_C( -5570627), INT32_C( 1604714526), INT32_C( 3276815), INT32_C( -865960168)),
5797 simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768),
5798 INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) },
5799 { simde_mm_set_epi32(INT32_C(-1556742099), INT32_C( 3735602), INT32_C( -795913538), INT32_C( 5177351)),
5800 simde_mm_set_epi32(INT32_C( 230555532), INT32_C( -681902099), INT32_C(-1460947394), INT32_C( 1435959285)),
5801 simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767),
5802 INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) },
5803 { simde_mm_set_epi32(INT32_C( 1058013130), INT32_C( 1801350196), INT32_C( 3735625), INT32_C( 393200)),
5804 simde_mm_set_epi32(INT32_C( -5046245), INT32_C( 1947557327), INT32_C( -390520293), INT32_C(-1060577736)),
5805 simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768),
5806 INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767)) },
5807 { simde_mm_set_epi32(INT32_C( 1625994666), INT32_C( 151157112), INT32_C( -6356918), INT32_C( 574958135)),
5808 simde_mm_set_epi32(INT32_C( -878149423), INT32_C( -1310820), INT32_C( 7694016), INT32_C( 1656093)),
5809 simde_mm_set_epi16(INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767),
5810 INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) },
5811 { simde_mm_set_epi32(INT32_C( 906756004), INT32_C( 589883340), INT32_C(-1375993871), INT32_C( -5221415)),
5812 simde_mm_set_epi32(INT32_C(-1492628097), INT32_C( -7536518), INT32_C( 1834989), INT32_C(-2090880115)),
5813 simde_mm_set_epi16(INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768),
5814 INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) },
5815 { simde_mm_set_epi32(INT32_C( 1759910713), INT32_C( 2028743221), INT32_C( 1203039561), INT32_C( 3735524)),
5816 simde_mm_set_epi32(INT32_C( 5505016), INT32_C( 68681650), INT32_C( 3895727), INT32_C(-1084227687)),
5817 simde_mm_set_epi16(INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768),
5818 INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767)) },
5819 { simde_mm_set_epi32(INT32_C( 1249181759), INT32_C( 850460644), INT32_C( 643956807), INT32_C( 1402185830)),
5820 simde_mm_set_epi32(INT32_C( 503821785), INT32_C( -1966044), INT32_C( -1228291), INT32_C( 6420027)),
5821 simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767),
5822 INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767)) },
5823 { simde_mm_set_epi32(INT32_C( 1562990695), INT32_C( 134021098), INT32_C(-1071906850), INT32_C( -558152330)),
5824 simde_mm_set_epi32(INT32_C(-1746927677), INT32_C( 7209004), INT32_C( 917512), INT32_C( 4155429)),
5825 simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767),
5826 INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) }
5827 };
5828
5829 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5830 simde__m128i r = simde_mm_packs_epi32(test_vec[i].a, test_vec[i].b);
5831 simde_assert_m128i_i16(r, ==, test_vec[i].r);
5832 }
5833
5834 return 0;
5835 }
5836
5837 static int
5838 test_simde_mm_packus_epi16(SIMDE_MUNIT_TEST_ARGS) {
5839 const struct {
5840 simde__m128i a;
5841 simde__m128i b;
5842 simde__m128i r;
5843 } test_vec[8] = {
5844 { simde_mm_set_epi16(INT16_C( 217), INT16_C(-10893), INT16_C( 10007), INT16_C(-11974),
5845 INT16_C( 134), INT16_C( 45), INT16_C( 21), INT16_C( 179)),
5846 simde_mm_set_epi16(INT16_C( 14829), INT16_C( 37), INT16_C( 2757), INT16_C(-26385),
5847 INT16_C( 26), INT16_C( 196), INT16_C( 2768), INT16_C( 221)),
5848 simde_x_mm_set_epu8(255, 37, 255, 0, 26, 196, 255, 221,
5849 217, 0, 255, 0, 134, 45, 21, 179) },
5850 { simde_mm_set_epi16(INT16_C( 84), INT16_C( 11197), INT16_C( 28), INT16_C(-18960),
5851 INT16_C( 0), INT16_C( 243), INT16_C( 209), INT16_C( 115)),
5852 simde_mm_set_epi16(INT16_C( 26800), INT16_C( 44), INT16_C( 244), INT16_C( 114),
5853 INT16_C( 234), INT16_C( 7269), INT16_C( 2441), INT16_C( -9419)),
5854 simde_x_mm_set_epu8(255, 44, 244, 114, 234, 255, 255, 0,
5855 84, 255, 28, 0, 0, 243, 209, 115) },
5856 { simde_mm_set_epi16(INT16_C( 26559), INT16_C(-13811), INT16_C( 141), INT16_C( 130),
5857 INT16_C(-24149), INT16_C( 185), INT16_C( 9120), INT16_C(-14604)),
5858 simde_mm_set_epi16(INT16_C( 190), INT16_C( 162), INT16_C( 3761), INT16_C(-10696),
5859 INT16_C( 15175), INT16_C( 6926), INT16_C( 19649), INT16_C( 79)),
5860 simde_x_mm_set_epu8(190, 162, 255, 0, 255, 255, 255, 79,
5861 255, 0, 141, 130, 0, 185, 255, 0) },
5862 { simde_mm_set_epi16(INT16_C( 250), INT16_C(-23643), INT16_C(-15994), INT16_C( 173),
5863 INT16_C( 97), INT16_C( 158), INT16_C( 82), INT16_C( 231)),
5864 simde_mm_set_epi16(INT16_C( 70), INT16_C( 30022), INT16_C( 0), INT16_C( -8717),
5865 INT16_C( 6), INT16_C( 206), INT16_C(-25401), INT16_C( 252)),
5866 simde_x_mm_set_epu8( 70, 255, 0, 0, 6, 206, 0, 252,
5867 250, 0, 0, 173, 97, 158, 82, 231) },
5868 { simde_mm_set_epi16(INT16_C( 92), INT16_C(-13839), INT16_C( 243), INT16_C( -3624),
5869 INT16_C( 252), INT16_C(-29405), INT16_C( 3), INT16_C( 6730)),
5870 simde_mm_set_epi16(INT16_C( 4496), INT16_C( 19200), INT16_C( 70), INT16_C( 128),
5871 INT16_C( 2496), INT16_C( 60), INT16_C( 18531), INT16_C(-20006)),
5872 simde_x_mm_set_epu8(255, 255, 70, 128, 255, 60, 255, 0,
5873 92, 0, 243, 0, 252, 0, 3, 255) },
5874 { simde_mm_set_epi16(INT16_C( 57), INT16_C(-14586), INT16_C( 21134), INT16_C( 7065),
5875 INT16_C( 3), INT16_C(-16049), INT16_C( 26223), INT16_C(-20721)),
5876 simde_mm_set_epi16(INT16_C( 129), INT16_C( 105), INT16_C( -1899), INT16_C( 221),
5877 INT16_C(-24446), INT16_C(-20297), INT16_C( 30906), INT16_C( 192)),
5878 simde_x_mm_set_epu8(129, 105, 0, 221, 0, 0, 255, 192,
5879 57, 0, 255, 255, 3, 0, 255, 0) },
5880 { simde_mm_set_epi16(INT16_C( 128), INT16_C( 22639), INT16_C( -9670), INT16_C( 8168),
5881 INT16_C( -1055), INT16_C(-24505), INT16_C( 32719), INT16_C( 16999)),
5882 simde_mm_set_epi16(INT16_C(-23185), INT16_C( 119), INT16_C( 108), INT16_C( 34),
5883 INT16_C(-15892), INT16_C( 2641), INT16_C( 242), INT16_C( -7325)),
5884 simde_x_mm_set_epu8( 0, 119, 108, 34, 0, 255, 242, 0,
5885 128, 255, 0, 255, 0, 0, 255, 255) },
5886 { simde_mm_set_epi16(INT16_C( 95), INT16_C( 145), INT16_C( 101), INT16_C( 5449),
5887 INT16_C( 163), INT16_C( 19185), INT16_C( 3025), INT16_C( 52)),
5888 simde_mm_set_epi16(INT16_C( 2870), INT16_C( 140), INT16_C( 144), INT16_C( 254),
5889 INT16_C( 8482), INT16_C( 4388), INT16_C( 201), INT16_C(-14867)),
5890 simde_x_mm_set_epu8(255, 140, 144, 254, 255, 255, 201, 0,
5891 95, 145, 101, 255, 163, 255, 255, 52) }
5892 };
5893
5894 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5895 simde__m128i r = simde_mm_packus_epi16(test_vec[i].a, test_vec[i].b);
5896 simde_assert_m128i_i8(r, ==, test_vec[i].r);
5897 }
5898
5899 return 0;
5900 }
5901
5902 static int
5903 test_simde_mm_sad_epu8(SIMDE_MUNIT_TEST_ARGS) {
5904 const struct {
5905 simde__m128i a;
5906 simde__m128i b;
5907 simde__m128i r;
5908 } test_vec[8] = {
5909 { simde_x_mm_set_epu8(215, 90, 59, 114, 199, 190, 5, 88,
5910 189, 152, 95, 90, 71, 40, 85, 39),
5911 simde_x_mm_set_epu8( 54, 166, 154, 195, 131, 97, 225, 141,
5912 107, 5, 50, 55, 194, 31, 223, 92),
5913 simde_mm_set_epi64x(INT64_C( 847), INT64_C( 632)) },
5914 { simde_x_mm_set_epu8( 73, 8, 35, 63, 9, 118, 137, 154,
5915 163, 61, 8, 4, 96, 39, 181, 31),
5916 simde_x_mm_set_epu8(244, 64, 21, 0, 73, 79, 47, 148,
5917 227, 0, 217, 151, 241, 123, 179, 200),
5918 simde_mm_set_epi64x(INT64_C( 503), INT64_C( 881)) },
5919 { simde_x_mm_set_epu8(188, 156, 164, 209, 37, 165, 186, 237,
5920 157, 45, 141, 9, 227, 9, 6, 113),
5921 simde_x_mm_set_epu8(196, 12, 188, 136, 227, 14, 111, 188,
5922 42, 252, 141, 251, 41, 42, 48, 10),
5923 simde_mm_set_epi64x(INT64_C( 714), INT64_C( 928)) },
5924 { simde_x_mm_set_epu8(221, 210, 203, 74, 151, 53, 237, 96,
5925 105, 62, 32, 146, 208, 27, 214, 15),
5926 simde_x_mm_set_epu8(106, 143, 238, 35, 165, 158, 48, 47,
5927 51, 172, 84, 44, 119, 233, 73, 10),
5928 simde_mm_set_epi64x(INT64_C( 613), INT64_C( 759)) },
5929 { simde_x_mm_set_epu8(158, 146, 218, 39, 84, 176, 15, 200,
5930 114, 100, 110, 72, 37, 118, 124, 52),
5931 simde_x_mm_set_epu8(250, 173, 237, 165, 77, 193, 83, 68,
5932 159, 214, 52, 182, 160, 117, 236, 237),
5933 simde_mm_set_epi64x(INT64_C( 488), INT64_C( 748)) },
5934 { simde_x_mm_set_epu8(116, 17, 11, 212, 41, 247, 182, 55,
5935 218, 151, 38, 248, 87, 3, 108, 3),
5936 simde_x_mm_set_epu8(178, 255, 4, 183, 81, 104, 79, 156,
5937 178, 174, 55, 110, 255, 70, 179, 129),
5938 simde_mm_set_epi64x(INT64_C( 723), INT64_C( 650)) },
5939 { simde_x_mm_set_epu8( 26, 112, 229, 82, 174, 243, 79, 54,
5940 103, 25, 150, 156, 120, 47, 29, 212),
5941 simde_x_mm_set_epu8(155, 158, 100, 233, 190, 145, 4, 176,
5942 236, 88, 45, 24, 159, 182, 83, 215),
5943 simde_mm_set_epi64x(INT64_C( 766), INT64_C( 664)) },
5944 { simde_x_mm_set_epu8( 29, 127, 97, 34, 247, 17, 64, 73,
5945 255, 69, 189, 150, 155, 84, 174, 88),
5946 simde_x_mm_set_epu8(224, 212, 67, 184, 190, 48, 118, 149,
5947 209, 255, 9, 200, 126, 242, 201, 30),
5948 simde_mm_set_epi64x(INT64_C( 678), INT64_C( 734)) }
5949 };
5950
5951 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5952 simde__m128i r = simde_mm_sad_epu8(test_vec[i].a, test_vec[i].b);
5953 simde_assert_m128i_u8(r, ==, test_vec[i].r);
5954 }
5955
5956 return 0;
5957 }
5958
5959 static int
5960 test_simde_mm_set_epi8(SIMDE_MUNIT_TEST_ARGS) {
5961 const struct {
5962 int8_t e15;
5963 int8_t e14;
5964 int8_t e13;
5965 int8_t e12;
5966 int8_t e11;
5967 int8_t e10;
5968 int8_t e9;
5969 int8_t e8;
5970 int8_t e7;
5971 int8_t e6;
5972 int8_t e5;
5973 int8_t e4;
5974 int8_t e3;
5975 int8_t e2;
5976 int8_t e1;
5977 int8_t e0;
5978 simde__m128i r;
5979 } test_vec[8] = {
5980 { -48, -30, 88, -96, 9, 89, 20, -95,
5981 63, -76, 126, 67, 85, 88, -17, -107,
5982 simde_mm_set_epi8(INT8_C( -48), INT8_C( -30), INT8_C( 88), INT8_C( -96),
5983 INT8_C( 9), INT8_C( 89), INT8_C( 20), INT8_C( -95),
5984 INT8_C( 63), INT8_C( -76), INT8_C( 126), INT8_C( 67),
5985 INT8_C( 85), INT8_C( 88), INT8_C( -17), INT8_C(-107)) },
5986 { 73, -68, -61, 58, -37, 5, -64, -56,
5987 -5, 33, -53, -34, -11, 57, 49, 12,
5988 simde_mm_set_epi8(INT8_C( 73), INT8_C( -68), INT8_C( -61), INT8_C( 58),
5989 INT8_C( -37), INT8_C( 5), INT8_C( -64), INT8_C( -56),
5990 INT8_C( -5), INT8_C( 33), INT8_C( -53), INT8_C( -34),
5991 INT8_C( -11), INT8_C( 57), INT8_C( 49), INT8_C( 12)) },
5992 { -65, -108, 95, -117, 35, 45, 54, -43,
5993 -45, 123, 113, -6, 23, -66, 77, 94,
5994 simde_mm_set_epi8(INT8_C( -65), INT8_C(-108), INT8_C( 95), INT8_C(-117),
5995 INT8_C( 35), INT8_C( 45), INT8_C( 54), INT8_C( -43),
5996 INT8_C( -45), INT8_C( 123), INT8_C( 113), INT8_C( -6),
5997 INT8_C( 23), INT8_C( -66), INT8_C( 77), INT8_C( 94)) },
5998 { -72, 95, 112, 68, 56, -74, -97, -55,
5999 22, 53, -22, 68, -107, 99, -5, -94,
6000 simde_mm_set_epi8(INT8_C( -72), INT8_C( 95), INT8_C( 112), INT8_C( 68),
6001 INT8_C( 56), INT8_C( -74), INT8_C( -97), INT8_C( -55),
6002 INT8_C( 22), INT8_C( 53), INT8_C( -22), INT8_C( 68),
6003 INT8_C(-107), INT8_C( 99), INT8_C( -5), INT8_C( -94)) },
6004 { -48, 6, 114, 89, -57, -104, -78, -72,
6005 -32, -41, -27, -58, -1, -100, -126, -52,
6006 simde_mm_set_epi8(INT8_C( -48), INT8_C( 6), INT8_C( 114), INT8_C( 89),
6007 INT8_C( -57), INT8_C(-104), INT8_C( -78), INT8_C( -72),
6008 INT8_C( -32), INT8_C( -41), INT8_C( -27), INT8_C( -58),
6009 INT8_C( -1), INT8_C(-100), INT8_C(-126), INT8_C( -52)) },
6010 { 75, -127, -59, 90, 126, -9, 88, 22,
6011 36, 75, -11, -10, 31, -72, 19, -30,
6012 simde_mm_set_epi8(INT8_C( 75), INT8_C(-127), INT8_C( -59), INT8_C( 90),
6013 INT8_C( 126), INT8_C( -9), INT8_C( 88), INT8_C( 22),
6014 INT8_C( 36), INT8_C( 75), INT8_C( -11), INT8_C( -10),
6015 INT8_C( 31), INT8_C( -72), INT8_C( 19), INT8_C( -30)) },
6016 { -66, 57, 86, -24, -102, 97, 37, 79,
6017 98, -52, 75, 113, -66, -45, -97, 50,
6018 simde_mm_set_epi8(INT8_C( -66), INT8_C( 57), INT8_C( 86), INT8_C( -24),
6019 INT8_C(-102), INT8_C( 97), INT8_C( 37), INT8_C( 79),
6020 INT8_C( 98), INT8_C( -52), INT8_C( 75), INT8_C( 113),
6021 INT8_C( -66), INT8_C( -45), INT8_C( -97), INT8_C( 50)) },
6022 { -14, -31, -3, 35, 62, 73, 10, 46,
6023 72, 110, -30, 71, -50, -46, 106, -75,
6024 simde_mm_set_epi8(INT8_C( -14), INT8_C( -31), INT8_C( -3), INT8_C( 35),
6025 INT8_C( 62), INT8_C( 73), INT8_C( 10), INT8_C( 46),
6026 INT8_C( 72), INT8_C( 110), INT8_C( -30), INT8_C( 71),
6027 INT8_C( -50), INT8_C( -46), INT8_C( 106), INT8_C( -75)) },
6028 };
6029
6030 // printf("\n");
6031 // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
6032 // int8_t e15 = munit_rand_uint32();
6033 // int8_t e14 = munit_rand_uint32();
6034 // int8_t e13 = munit_rand_uint32();
6035 // int8_t e12 = munit_rand_uint32();
6036 // int8_t e11 = munit_rand_uint32();
6037 // int8_t e10 = munit_rand_uint32();
6038 // int8_t e9 = munit_rand_uint32();
6039 // int8_t e8 = munit_rand_uint32();
6040 // int8_t e7 = munit_rand_uint32();
6041 // int8_t e6 = munit_rand_uint32();
6042 // int8_t e5 = munit_rand_uint32();
6043 // int8_t e4 = munit_rand_uint32();
6044 // int8_t e3 = munit_rand_uint32();
6045 // int8_t e2 = munit_rand_uint32();
6046 // int8_t e1 = munit_rand_uint32();
6047 // int8_t e0 = munit_rand_uint32();
6048 // simde__m128i_private r;
6049
6050 // r = simde__m128i_to_private(simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0));
6051
6052 // printf(" { %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ",\n"
6053 // " %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ",\n",
6054 // e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0);
6055 // printf(" simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
6056 // " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
6057 // " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
6058 // " INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")) },\n",
6059 // r.i8[15], r.i8[14], r.i8[13], r.i8[12], r.i8[11], r.i8[10], r.i8[ 9], r.i8[ 8],
6060 // r.i8[ 7], r.i8[ 6], r.i8[ 5], r.i8[ 4], r.i8[ 3], r.i8[ 2], r.i8[ 1], r.i8[ 0]);
6061 // }
6062 // return MUNIT_FAIL;
6063
6064 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6065 simde__m128i r = simde_mm_set_epi8(
6066 test_vec[i].e15, test_vec[i].e14, test_vec[i].e13, test_vec[i].e12,
6067 test_vec[i].e11, test_vec[i].e10, test_vec[i].e9, test_vec[i].e8,
6068 test_vec[i].e7, test_vec[i].e6, test_vec[i].e5, test_vec[i].e4,
6069 test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0);
6070 simde_assert_m128i_i16(r, ==, test_vec[i].r);
6071 }
6072
6073 return 0;
6074 }
6075
6076
6077 static int
6078 test_simde_mm_set_epi16(SIMDE_MUNIT_TEST_ARGS) {
6079 const struct {
6080 int16_t e7;
6081 int16_t e6;
6082 int16_t e5;
6083 int16_t e4;
6084 int16_t e3;
6085 int16_t e2;
6086 int16_t e1;
6087 int16_t e0;
6088 simde__m128i r;
6089 } test_vec[8] = {
6090 { -12714, -18436, 19109, 27542, -4031, 11847, 32066, 4849,
6091 simde_mm_set_epi16(INT16_C(-12714), INT16_C(-18436), INT16_C( 19109), INT16_C( 27542),
6092 INT16_C( -4031), INT16_C( 11847), INT16_C( 32066), INT16_C( 4849)) },
6093 { 20812, -18306, 32711, 2248, -22144, -30920, 20888, -23709,
6094 simde_mm_set_epi16(INT16_C( 20812), INT16_C(-18306), INT16_C( 32711), INT16_C( 2248),
6095 INT16_C(-22144), INT16_C(-30920), INT16_C( 20888), INT16_C(-23709)) },
6096 { 8868, -14625, -5258, -12928, -11989, 31315, -9098, 19222,
6097 simde_mm_set_epi16(INT16_C( 8868), INT16_C(-14625), INT16_C( -5258), INT16_C(-12928),
6098 INT16_C(-11989), INT16_C( 31315), INT16_C( -9098), INT16_C( 19222)) },
6099 { -5334, 23871, 3901, 14443, -13328, 23359, -24889, 28356,
6100 simde_mm_set_epi16(INT16_C( -5334), INT16_C( 23871), INT16_C( 3901), INT16_C( 14443),
6101 INT16_C(-13328), INT16_C( 23359), INT16_C(-24889), INT16_C( 28356)) },
6102 { 10774, -19043, 31284, 4044, 862, -11938, -27554, -25119,
6103 simde_mm_set_epi16(INT16_C( 10774), INT16_C(-19043), INT16_C( 31284), INT16_C( 4044),
6104 INT16_C( 862), INT16_C(-11938), INT16_C(-27554), INT16_C(-25119)) },
6105 { 20150, -31510, -29797, -3272, -18019, 16111, -15969, -11740,
6106 simde_mm_set_epi16(INT16_C( 20150), INT16_C(-31510), INT16_C(-29797), INT16_C( -3272),
6107 INT16_C(-18019), INT16_C( 16111), INT16_C(-15969), INT16_C(-11740)) },
6108 { -3147, -24243, -28710, -5510, -20724, 13872, -9632, -7728,
6109 simde_mm_set_epi16(INT16_C( -3147), INT16_C(-24243), INT16_C(-28710), INT16_C( -5510),
6110 INT16_C(-20724), INT16_C( 13872), INT16_C( -9632), INT16_C( -7728)) },
6111 { 6318, 11524, 30789, -2974, 3458, -10908, -25743, -20801,
6112 simde_mm_set_epi16(INT16_C( 6318), INT16_C( 11524), INT16_C( 30789), INT16_C( -2974),
6113 INT16_C( 3458), INT16_C(-10908), INT16_C(-25743), INT16_C(-20801)) },
6114 };
6115
6116 // printf("\n");
6117 // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
6118 // int16_t e7 = munit_rand_uint32();
6119 // int16_t e6 = munit_rand_uint32();
6120 // int16_t e5 = munit_rand_uint32();
6121 // int16_t e4 = munit_rand_uint32();
6122 // int16_t e3 = munit_rand_uint32();
6123 // int16_t e2 = munit_rand_uint32();
6124 // int16_t e1 = munit_rand_uint32();
6125 // int16_t e0 = munit_rand_uint32();
6126 // simde__m128i_private r;
6127
6128 // r = simde__m128i_to_private(simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0));
6129
6130 // printf(" { %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ",\n",
6131 // e7, e6, e5, e4, e3, e2, e1, e0);
6132 // printf(" simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
6133 // " INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")) },\n",
6134 // r.i16[7], r.i16[6], r.i16[5], r.i16[4], r.i16[3], r.i16[2], r.i16[1], r.i16[0]);
6135 // }
6136 // return MUNIT_FAIL;
6137
6138 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6139 simde__m128i r = simde_mm_set_epi16(
6140 test_vec[i].e7, test_vec[i].e6, test_vec[i].e5, test_vec[i].e4,
6141 test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0);
6142 simde_assert_m128i_i16(r, ==, test_vec[i].r);
6143 }
6144
6145 return 0;
6146 }
6147
6148 static int
6149 test_simde_mm_set_epi32(SIMDE_MUNIT_TEST_ARGS) {
6150 const struct {
6151 int32_t e3;
6152 int32_t e2;
6153 int32_t e1;
6154 int32_t e0;
6155 simde__m128i r;
6156 } test_vec[8] = {
6157 { 391721235, 1158362691, 2131167346, -1830589941,
6158 simde_mm_set_epi32(INT32_C( 391721235), INT32_C( 1158362691), INT32_C( 2131167346), INT32_C(-1830589941)) },
6159 { 2141048609, 2073510589, 924258053, 594030571,
6160 simde_mm_set_epi32(INT32_C( 2141048609), INT32_C( 2073510589), INT32_C( 924258053), INT32_C( 594030571)) },
6161 { 1247989717, 798714331, -1727766974, 1099259705,
6162 simde_mm_set_epi32(INT32_C( 1247989717), INT32_C( 798714331), INT32_C(-1727766974), INT32_C( 1099259705)) },
6163 { 1870669627, 1775697551, -2027090738, -1897466045,
6164 simde_mm_set_epi32(INT32_C( 1870669627), INT32_C( 1775697551), INT32_C(-2027090738), INT32_C(-1897466045)) },
6165 { -584467290, 2134946541, 565373055, -212717620,
6166 simde_mm_set_epi32(INT32_C( -584467290), INT32_C( 2134946541), INT32_C( 565373055), INT32_C( -212717620)) },
6167 { 2072276971, 1968759191, 2049222745, 64876297,
6168 simde_mm_set_epi32(INT32_C( 2072276971), INT32_C( 1968759191), INT32_C( 2049222745), INT32_C( 64876297)) },
6169 { -285499155, -775226349, 1401270915, -476575867,
6170 simde_mm_set_epi32(INT32_C( -285499155), INT32_C( -775226349), INT32_C( 1401270915), INT32_C( -476575867)) },
6171 { -135350759, -1402535212, -799024597, 1171022108,
6172 simde_mm_set_epi32(INT32_C( -135350759), INT32_C(-1402535212), INT32_C( -799024597), INT32_C( 1171022108)) },
6173
6174 };
6175
6176 // printf("\n");
6177 // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
6178 // int32_t e3 = munit_rand_uint32();
6179 // int32_t e2 = munit_rand_uint32();
6180 // int32_t e1 = munit_rand_uint32();
6181 // int32_t e0 = munit_rand_uint32();
6182 // simde__m128i_private r;
6183
6184 // r = simde__m128i_to_private(simde_mm_set_epi32(e3, e2, e1, e0));
6185
6186 // printf(" { %11" PRId32 ", %11" PRId32 ", %11" PRId32 ", %11" PRId32 ",\n",
6187 // e3, e2, e1, e0);
6188 // printf(" simde_mm_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")) },\n",
6189 // r.i32[3], r.i32[2], r.i32[1], r.i32[0]);
6190 // }
6191 // return MUNIT_FAIL;
6192
6193 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6194 simde__m128i r = simde_mm_set_epi32(
6195 test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0);
6196 simde_assert_m128i_i32(r, ==, test_vec[i].r);
6197 }
6198
6199 return 0;
6200 }
6201
6202
6203 static int
6204 test_simde_mm_set_epi64(SIMDE_MUNIT_TEST_ARGS) {
6205 const struct {
6206 simde__m64 a;
6207 simde__m64 b;
6208 simde__m128i r;
6209 } test_vec[8] = {
6210 { simde_x_mm_set_pi64(INT64_C( -664890281848034973)),
6211 simde_x_mm_set_pi64(INT64_C(-2789670716680390611)),
6212 simde_mm_set_epi64x(INT64_C( -664890281848034973), INT64_C(-2789670716680390611)) },
6213 { simde_x_mm_set_pi64(INT64_C( 5148232775303872766)),
6214 simde_x_mm_set_pi64(INT64_C(-4313892930136448255)),
6215 simde_mm_set_epi64x(INT64_C( 5148232775303872766), INT64_C(-4313892930136448255)) },
6216 { simde_x_mm_set_pi64(INT64_C(-1888312870737326599)),
6217 simde_x_mm_set_pi64(INT64_C( 5248373813564878857)),
6218 simde_mm_set_epi64x(INT64_C(-1888312870737326599), INT64_C( 5248373813564878857)) },
6219 { simde_x_mm_set_pi64(INT64_C(-1560565807933837504)),
6220 simde_x_mm_set_pi64(INT64_C( 7268621988108136806)),
6221 simde_mm_set_epi64x(INT64_C(-1560565807933837504), INT64_C( 7268621988108136806)) },
6222 { simde_x_mm_set_pi64(INT64_C(-1956110667393926378)),
6223 simde_x_mm_set_pi64(INT64_C( 345154446382384077)),
6224 simde_mm_set_epi64x(INT64_C(-1956110667393926378), INT64_C( 345154446382384077)) },
6225 { simde_x_mm_set_pi64(INT64_C(-8505578167241709019)),
6226 simde_x_mm_set_pi64(INT64_C( 8252355195326597777)),
6227 simde_mm_set_epi64x(INT64_C(-8505578167241709019), INT64_C( 8252355195326597777)) },
6228 { simde_x_mm_set_pi64(INT64_C( 1122841158674863793)),
6229 simde_x_mm_set_pi64(INT64_C(-5697643761898453242)),
6230 simde_mm_set_epi64x(INT64_C( 1122841158674863793), INT64_C(-5697643761898453242)) },
6231 { simde_x_mm_set_pi64(INT64_C(-6130487997584440381)),
6232 simde_x_mm_set_pi64(INT64_C( 8349290391131198480)),
6233 simde_mm_set_epi64x(INT64_C(-6130487997584440381), INT64_C( 8349290391131198480)) }
6234 };
6235
6236 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6237 simde__m128i r = simde_mm_set_epi64(test_vec[i].a, test_vec[i].b);
6238 simde_assert_m128i_i64(r, ==, test_vec[i].r);
6239 }
6240
6241 return 0;
6242 }
6243 static int
6244 test_simde_mm_set_epi64x(SIMDE_MUNIT_TEST_ARGS) {
6245 const struct {
6246 int64_t e0;
6247 int64_t e1;
6248 simde__m128i r;
6249 } test_vec[8] = {
6250 { 4539993052502346892, 6550919315486945587,
6251 simde_mm_set_epi64x(INT64_C( 4539993052502346892), INT64_C( 6550919315486945587)) },
6252 { -8973439144672590874, 1846200258209621581,
6253 simde_mm_set_epi64x(INT64_C(-8973439144672590874), INT64_C( 1846200258209621581)) },
6254 { 771735515044186414, -5491872275643679405,
6255 simde_mm_set_epi64x(INT64_C( 771735515044186414), INT64_C(-5491872275643679405)) },
6256 { 3535609691698693035, -2659398015885158473,
6257 simde_mm_set_epi64x(INT64_C( 3535609691698693035), INT64_C(-2659398015885158473)) },
6258 { -5310489553719126486, -1326851720416490864,
6259 simde_mm_set_epi64x(INT64_C(-5310489553719126486), INT64_C(-1326851720416490864)) },
6260 { -1132069192689462333, -3126474808030937011,
6261 simde_mm_set_epi64x(INT64_C(-1132069192689462333), INT64_C(-3126474808030937011)) },
6262 { 3201360662826502659, 2894150994676591563,
6263 simde_mm_set_epi64x(INT64_C( 3201360662826502659), INT64_C( 2894150994676591563)) },
6264 { 5657213110111307867, 4054595932996548594,
6265 simde_mm_set_epi64x(INT64_C( 5657213110111307867), INT64_C( 4054595932996548594)) },
6266
6267 };
6268
6269 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6270 simde__m128i r = simde_mm_set_epi64x(test_vec[i].e0, test_vec[i].e1);
6271 simde_assert_m128i_i64(r, ==, test_vec[i].r);
6272 }
6273
6274 return 0;
6275 }
6276
6277 static int
6278 test_simde_mm_set1_epi8 (SIMDE_MUNIT_TEST_ARGS) {
6279 static const struct {
6280 const int8_t a;
6281 const int8_t r[16];
6282 } test_vec[] = {
6283 { -INT8_C( 57),
6284 { -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57),
6285 -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57) } },
6286 { -INT8_C( 62),
6287 { -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62),
6288 -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62), -INT8_C( 62) } },
6289 { -INT8_C( 94),
6290 { -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94),
6291 -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94), -INT8_C( 94) } },
6292 { -INT8_C( 11),
6293 { -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11),
6294 -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11), -INT8_C( 11) } },
6295 { -INT8_C( 57),
6296 { -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57),
6297 -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57), -INT8_C( 57) } },
6298 { INT8_C( 73),
6299 { INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73),
6300 INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73), INT8_C( 73) } },
6301 { INT8_C( 60),
6302 { INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60),
6303 INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60), INT8_C( 60) } },
6304 { -INT8_C( 6),
6305 { -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6),
6306 -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6), -INT8_C( 6) } }
6307 };
6308
6309 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
6310 simde__m128i r = simde_mm_set1_epi8(test_vec[i].a);
6311 simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r));
6312 }
6313
6314 return 0;
6315 }
6316
6317 static int
6318 test_simde_mm_set1_epi16(SIMDE_MUNIT_TEST_ARGS) {
6319 const struct {
6320 int16_t a;
6321 simde__m128i r;
6322 } test_vec[8] = {
6323 { -22932,
6324 simde_mm_set_epi16(INT16_C(-22932), INT16_C(-22932), INT16_C(-22932), INT16_C(-22932),
6325 INT16_C(-22932), INT16_C(-22932), INT16_C(-22932), INT16_C(-22932)) },
6326 { 23064,
6327 simde_mm_set_epi16(INT16_C( 23064), INT16_C( 23064), INT16_C( 23064), INT16_C( 23064),
6328 INT16_C( 23064), INT16_C( 23064), INT16_C( 23064), INT16_C( 23064)) },
6329 { 29063,
6330 simde_mm_set_epi16(INT16_C( 29063), INT16_C( 29063), INT16_C( 29063), INT16_C( 29063),
6331 INT16_C( 29063), INT16_C( 29063), INT16_C( 29063), INT16_C( 29063)) },
6332 { -6254,
6333 simde_mm_set_epi16(INT16_C( -6254), INT16_C( -6254), INT16_C( -6254), INT16_C( -6254),
6334 INT16_C( -6254), INT16_C( -6254), INT16_C( -6254), INT16_C( -6254)) },
6335 { 23328,
6336 simde_mm_set_epi16(INT16_C( 23328), INT16_C( 23328), INT16_C( 23328), INT16_C( 23328),
6337 INT16_C( 23328), INT16_C( 23328), INT16_C( 23328), INT16_C( 23328)) },
6338 { 12202,
6339 simde_mm_set_epi16(INT16_C( 12202), INT16_C( 12202), INT16_C( 12202), INT16_C( 12202),
6340 INT16_C( 12202), INT16_C( 12202), INT16_C( 12202), INT16_C( 12202)) },
6341 { 26711,
6342 simde_mm_set_epi16(INT16_C( 26711), INT16_C( 26711), INT16_C( 26711), INT16_C( 26711),
6343 INT16_C( 26711), INT16_C( 26711), INT16_C( 26711), INT16_C( 26711)) },
6344 { -9629,
6345 simde_mm_set_epi16(INT16_C( -9629), INT16_C( -9629), INT16_C( -9629), INT16_C( -9629),
6346 INT16_C( -9629), INT16_C( -9629), INT16_C( -9629), INT16_C( -9629)) },
6347 };
6348
6349 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6350 simde__m128i r = simde_mm_set1_epi16(test_vec[i].a);
6351 simde_assert_m128i_i16(r, ==, test_vec[i].r);
6352 }
6353
6354 return 0;
6355 }
6356
6357 static int
6358 test_simde_mm_set1_epi32 (SIMDE_MUNIT_TEST_ARGS) {
6359 static const struct {
6360 const int32_t a;
6361 const int32_t r[4];
6362 } test_vec[] = {
6363 { -INT32_C( 1379277210),
6364 { -INT32_C( 1379277210), -INT32_C( 1379277210), -INT32_C( 1379277210), -INT32_C( 1379277210) } },
6365 { INT32_C( 1628685468),
6366 { INT32_C( 1628685468), INT32_C( 1628685468), INT32_C( 1628685468), INT32_C( 1628685468) } },
6367 { INT32_C( 1687738541),
6368 { INT32_C( 1687738541), INT32_C( 1687738541), INT32_C( 1687738541), INT32_C( 1687738541) } },
6369 { INT32_C( 1891425133),
6370 { INT32_C( 1891425133), INT32_C( 1891425133), INT32_C( 1891425133), INT32_C( 1891425133) } },
6371 { INT32_C( 1695660386),
6372 { INT32_C( 1695660386), INT32_C( 1695660386), INT32_C( 1695660386), INT32_C( 1695660386) } },
6373 { INT32_C( 1846447439),
6374 { INT32_C( 1846447439), INT32_C( 1846447439), INT32_C( 1846447439), INT32_C( 1846447439) } },
6375 { INT32_C( 958687000),
6376 { INT32_C( 958687000), INT32_C( 958687000), INT32_C( 958687000), INT32_C( 958687000) } },
6377 { -INT32_C( 1238079408),
6378 { -INT32_C( 1238079408), -INT32_C( 1238079408), -INT32_C( 1238079408), -INT32_C( 1238079408) } }
6379 };
6380
6381 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
6382 simde__m128i r = simde_mm_set1_epi32(test_vec[i].a);
6383 simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
6384 }
6385
6386 return 0;
6387 }
6388
6389 static int
6390 test_simde_mm_set1_pd(SIMDE_MUNIT_TEST_ARGS) {
6391 const struct {
6392 simde_float64 a;
6393 simde__m128d r;
6394 } test_vec[8] = {
6395 { SIMDE_FLOAT64_C( 922.45),
6396 simde_mm_set_pd(SIMDE_FLOAT64_C( 922.45), SIMDE_FLOAT64_C( 922.45)) },
6397 { SIMDE_FLOAT64_C( -599.83),
6398 simde_mm_set_pd(SIMDE_FLOAT64_C( -599.83), SIMDE_FLOAT64_C( -599.83)) },
6399 { SIMDE_FLOAT64_C( -398.06),
6400 simde_mm_set_pd(SIMDE_FLOAT64_C( -398.06), SIMDE_FLOAT64_C( -398.06)) },
6401 { SIMDE_FLOAT64_C( 758.75),
6402 simde_mm_set_pd(SIMDE_FLOAT64_C( 758.75), SIMDE_FLOAT64_C( 758.75)) },
6403 { SIMDE_FLOAT64_C( -273.82),
6404 simde_mm_set_pd(SIMDE_FLOAT64_C( -273.82), SIMDE_FLOAT64_C( -273.82)) },
6405 { SIMDE_FLOAT64_C( -320.64),
6406 simde_mm_set_pd(SIMDE_FLOAT64_C( -320.64), SIMDE_FLOAT64_C( -320.64)) },
6407 { SIMDE_FLOAT64_C( 627.18),
6408 simde_mm_set_pd(SIMDE_FLOAT64_C( 627.18), SIMDE_FLOAT64_C( 627.18)) },
6409 { SIMDE_FLOAT64_C( 433.85),
6410 simde_mm_set_pd(SIMDE_FLOAT64_C( 433.85), SIMDE_FLOAT64_C( 433.85)) }
6411 };
6412
6413 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6414 simde__m128d r = simde_mm_set1_pd(test_vec[i].a);
6415 simde_assert_m128d_close(r, test_vec[i].r, 1);
6416 }
6417
6418 return 0;
6419 }
6420
6421 static int
6422 test_simde_mm_set_pd(SIMDE_MUNIT_TEST_ARGS) {
6423 const struct {
6424 double e0;
6425 double e1;
6426 simde__m128d r;
6427 } test_vec[8] = {
6428 { (1062807988.00), 4166063422.00,
6429 simde_mm_set_pd(SIMDE_FLOAT64_C(1062807988.00), SIMDE_FLOAT64_C(4166063422.00)) },
6430 { (4089462150.00), 3301875355.00,
6431 simde_mm_set_pd(SIMDE_FLOAT64_C(4089462150.00), SIMDE_FLOAT64_C(3301875355.00)) },
6432 { (2961047618.00), 1310362259.00,
6433 simde_mm_set_pd(SIMDE_FLOAT64_C(2961047618.00), SIMDE_FLOAT64_C(1310362259.00)) },
6434 { (491413403.00), 2980697460.00,
6435 simde_mm_set_pd(SIMDE_FLOAT64_C(491413403.00), SIMDE_FLOAT64_C(2980697460.00)) },
6436 { (3027292014.00), 1034055676.00,
6437 simde_mm_set_pd(SIMDE_FLOAT64_C(3027292014.00), SIMDE_FLOAT64_C(1034055676.00)) },
6438 { (133655993.00), 2416999239.00,
6439 simde_mm_set_pd(SIMDE_FLOAT64_C(133655993.00), SIMDE_FLOAT64_C(2416999239.00)) },
6440 { (2396615078.00), 517112175.00,
6441 simde_mm_set_pd(SIMDE_FLOAT64_C(2396615078.00), SIMDE_FLOAT64_C(517112175.00)) },
6442 { (628434760.00), 1544868779.00,
6443 simde_mm_set_pd(SIMDE_FLOAT64_C(628434760.00), SIMDE_FLOAT64_C(1544868779.00)) },
6444 };
6445
6446 // printf("\n");
6447 // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
6448 // double e0 = munit_rand_uint32() ;
6449 // double e1 = munit_rand_uint32() ;
6450 // simde__m128d_private r;
6451
6452 // r = simde__m128d_to_private(simde_mm_set_pd(e0, e1));
6453
6454 // printf(" { (%*.2f), %*.2f,\n", 8, e0, 8, e1);
6455 // printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]);
6456 // }
6457 // return MUNIT_FAIL;
6458
6459 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6460 simde__m128d r = simde_mm_set_pd(test_vec[i].e0, test_vec[i].e1);
6461 simde_assert_m128d_close(r, test_vec[i].r, 1);
6462 }
6463
6464 return 0;
6465 }
6466
6467 static int
6468 test_simde_mm_set_pd1(SIMDE_MUNIT_TEST_ARGS) {
6469 const struct {
6470 double a;
6471 simde__m128d r;
6472 } test_vec[8] = {
6473 { (983122077.00),
6474 simde_mm_set_pd(SIMDE_FLOAT64_C(983122077.00), SIMDE_FLOAT64_C(983122077.00)) },
6475 { (2243688041.00),
6476 simde_mm_set_pd(SIMDE_FLOAT64_C(2243688041.00), SIMDE_FLOAT64_C(2243688041.00)) },
6477 { (1259032742.00),
6478 simde_mm_set_pd(SIMDE_FLOAT64_C(1259032742.00), SIMDE_FLOAT64_C(1259032742.00)) },
6479 { (945157531.00),
6480 simde_mm_set_pd(SIMDE_FLOAT64_C(945157531.00), SIMDE_FLOAT64_C(945157531.00)) },
6481 { (2547177525.00),
6482 simde_mm_set_pd(SIMDE_FLOAT64_C(2547177525.00), SIMDE_FLOAT64_C(2547177525.00)) },
6483 { (2112014239.00),
6484 simde_mm_set_pd(SIMDE_FLOAT64_C(2112014239.00), SIMDE_FLOAT64_C(2112014239.00)) },
6485 { (1570949017.00),
6486 simde_mm_set_pd(SIMDE_FLOAT64_C(1570949017.00), SIMDE_FLOAT64_C(1570949017.00)) },
6487 { (1215464208.00),
6488 simde_mm_set_pd(SIMDE_FLOAT64_C(1215464208.00), SIMDE_FLOAT64_C(1215464208.00)) },
6489 };
6490
6491 // printf("\n");
6492 // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
6493 // double a = munit_rand_uint32() ;
6494 // simde__m128d_private r;
6495
6496 // r = simde__m128d_to_private(simde_mm_set_pd1(a));
6497
6498 // printf(" { (%*.2f),\n", 8, a);
6499 // printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]);
6500 // }
6501 // return MUNIT_FAIL;
6502
6503 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6504 simde__m128d r = simde_mm_set_pd1(test_vec[i].a);
6505 simde_assert_m128d_close(r, test_vec[i].r, 1);
6506 }
6507
6508 return 0;
6509 }
6510
6511
6512 static int
6513 test_simde_mm_set_sd(SIMDE_MUNIT_TEST_ARGS) {
6514 const struct {
6515 simde_float64 a;
6516 simde__m128d r;
6517 } test_vec[8] = {
6518 { SIMDE_FLOAT64_C( -222.00),
6519 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -222.00)) },
6520 { SIMDE_FLOAT64_C( 804.62),
6521 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 804.62)) },
6522 { SIMDE_FLOAT64_C( 845.92),
6523 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 845.92)) },
6524 { SIMDE_FLOAT64_C( 892.20),
6525 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 892.20)) },
6526 { SIMDE_FLOAT64_C( 233.47),
6527 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 233.47)) },
6528 { SIMDE_FLOAT64_C( -916.51),
6529 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -916.51)) },
6530 { SIMDE_FLOAT64_C( -0.11),
6531 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -0.11)) },
6532 { SIMDE_FLOAT64_C( -843.72),
6533 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -843.72)) }
6534 };
6535
6536 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6537 simde__m128d r = simde_mm_set_sd(test_vec[i].a);
6538 simde_assert_m128d_close(r, test_vec[i].r, 1);
6539 }
6540
6541 return 0;
6542 }
6543
6544 static int
6545 test_simde_mm_set1_epi64(SIMDE_MUNIT_TEST_ARGS) {
6546 const struct {
6547 simde__m64 a;
6548 simde__m128i r;
6549 } test_vec[8] = {
6550 { simde_x_mm_set_pi64(INT64_C(5509445936599134262)),
6551 simde_mm_set_epi64x(INT64_C(5509445936599134262), INT64_C(5509445936599134262)) },
6552 { simde_x_mm_set_pi64(INT64_C(6533321325309895597)),
6553 simde_mm_set_epi64x(INT64_C(6533321325309895597), INT64_C(6533321325309895597)) },
6554 { simde_x_mm_set_pi64(INT64_C(8570268616515205604)),
6555 simde_mm_set_epi64x(INT64_C(8570268616515205604), INT64_C(8570268616515205604)) },
6556 { simde_x_mm_set_pi64(INT64_C(6893954556242409981)),
6557 simde_mm_set_epi64x(INT64_C(6893954556242409981), INT64_C(6893954556242409981)) },
6558 { simde_x_mm_set_pi64(INT64_C( 479685313418970755)),
6559 simde_mm_set_epi64x(INT64_C( 479685313418970755), INT64_C( 479685313418970755)) },
6560 { simde_x_mm_set_pi64(INT64_C(1310625044422752521)),
6561 simde_mm_set_epi64x(INT64_C(1310625044422752521), INT64_C(1310625044422752521)) },
6562 { simde_x_mm_set_pi64(INT64_C(-9181800088333422881)),
6563 simde_mm_set_epi64x(INT64_C(-9181800088333422881), INT64_C(-9181800088333422881)) },
6564 { simde_x_mm_set_pi64(INT64_C(-4247659939651135559)),
6565 simde_mm_set_epi64x(INT64_C(-4247659939651135559), INT64_C(-4247659939651135559)) }
6566 };
6567
6568 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6569 simde__m128i r = simde_mm_set1_epi64(test_vec[i].a);
6570 simde_assert_m128i_i64(r, ==, test_vec[i].r);
6571 }
6572
6573 return 0;
6574 }
6575
6576 static int
6577 test_simde_mm_set1_epi64x(SIMDE_MUNIT_TEST_ARGS) {
6578 const struct {
6579 int64_t a;
6580 simde__m128i r;
6581 } test_vec[8] = {
6582 { INT64_C(-7342192307236287075),
6583 simde_mm_set_epi64x(INT64_C(-7342192307236287075), INT64_C(-7342192307236287075)) },
6584 { INT64_C(-8079223173243549940),
6585 simde_mm_set_epi64x(INT64_C(-8079223173243549940), INT64_C(-8079223173243549940)) },
6586 { INT64_C(8128959178680760661),
6587 simde_mm_set_epi64x(INT64_C(8128959178680760661), INT64_C(8128959178680760661)) },
6588 { INT64_C(6271233176655491948),
6589 simde_mm_set_epi64x(INT64_C(6271233176655491948), INT64_C(6271233176655491948)) },
6590 { INT64_C(3474926301195230116),
6591 simde_mm_set_epi64x(INT64_C(3474926301195230116), INT64_C(3474926301195230116)) },
6592 { INT64_C(-5217363481586450008),
6593 simde_mm_set_epi64x(INT64_C(-5217363481586450008), INT64_C(-5217363481586450008)) },
6594 { INT64_C(-7156667910834929798),
6595 simde_mm_set_epi64x(INT64_C(-7156667910834929798), INT64_C(-7156667910834929798)) },
6596 { INT64_C(8467790055770652882),
6597 simde_mm_set_epi64x(INT64_C(8467790055770652882), INT64_C(8467790055770652882)) }
6598 };
6599
6600 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6601 simde__m128i r = simde_mm_set1_epi64x(test_vec[i].a);
6602 simde_assert_m128i_i64(r, ==, test_vec[i].r);
6603 }
6604
6605 return 0;
6606 }
6607
6608 static int
6609 test_simde_mm_setr_epi8(SIMDE_MUNIT_TEST_ARGS) {
6610 const struct {
6611 int8_t a[16];
6612 simde__m128i r;
6613 } test_vec[8] = {
6614 { { -117, 101, -68, -84, 57, -16, 14, 112,
6615 -4, -62, 47, -17, 21, 25, -74, 93 },
6616 simde_mm_set_epi8(INT8_C( 93), INT8_C( -74), INT8_C( 25), INT8_C( 21), INT8_C( -17), INT8_C( 47), INT8_C( -62), INT8_C( -4),
6617 INT8_C( 112), INT8_C( 14), INT8_C( -16), INT8_C( 57), INT8_C( -84), INT8_C( -68), INT8_C( 101), INT8_C(-117)) },
6618 { { -121, -99, 93, 75, -45, 61, -29, 21,
6619 43, -55, -114, 120, 9, -25, 107, 106 },
6620 simde_mm_set_epi8(INT8_C( 106), INT8_C( 107), INT8_C( -25), INT8_C( 9), INT8_C( 120), INT8_C(-114), INT8_C( -55), INT8_C( 43),
6621 INT8_C( 21), INT8_C( -29), INT8_C( 61), INT8_C( -45), INT8_C( 75), INT8_C( 93), INT8_C( -99), INT8_C(-121)) },
6622 { { 17, 120, 33, -15, -38, -48, 75, -19,
6623 105, -73, -87, 91, 57, 125, 70, 11 },
6624 simde_mm_set_epi8(INT8_C( 11), INT8_C( 70), INT8_C( 125), INT8_C( 57), INT8_C( 91), INT8_C( -87), INT8_C( -73), INT8_C( 105),
6625 INT8_C( -19), INT8_C( 75), INT8_C( -48), INT8_C( -38), INT8_C( -15), INT8_C( 33), INT8_C( 120), INT8_C( 17)) },
6626 { { 56, -40, 93, 54, 0, -115, -62, 6,
6627 10, -58, -12, 31, -96, 67, 12, 19 },
6628 simde_mm_set_epi8(INT8_C( 19), INT8_C( 12), INT8_C( 67), INT8_C( -96), INT8_C( 31), INT8_C( -12), INT8_C( -58), INT8_C( 10),
6629 INT8_C( 6), INT8_C( -62), INT8_C(-115), INT8_C( 0), INT8_C( 54), INT8_C( 93), INT8_C( -40), INT8_C( 56)) },
6630 { { 37, -21, 96, -83, 46, -81, -51, -14,
6631 127, 26, -91, -48, 45, -55, -111, 109 },
6632 simde_mm_set_epi8(INT8_C( 109), INT8_C(-111), INT8_C( -55), INT8_C( 45), INT8_C( -48), INT8_C( -91), INT8_C( 26), INT8_C( 127),
6633 INT8_C( -14), INT8_C( -51), INT8_C( -81), INT8_C( 46), INT8_C( -83), INT8_C( 96), INT8_C( -21), INT8_C( 37)) },
6634 { { -77, 43, 114, -94, -36, -86, -18, 18,
6635 14, -4, 99, 78, 44, 70, 105, -91 },
6636 simde_mm_set_epi8(INT8_C( -91), INT8_C( 105), INT8_C( 70), INT8_C( 44), INT8_C( 78), INT8_C( 99), INT8_C( -4), INT8_C( 14),
6637 INT8_C( 18), INT8_C( -18), INT8_C( -86), INT8_C( -36), INT8_C( -94), INT8_C( 114), INT8_C( 43), INT8_C( -77)) },
6638 { { 125, -73, -25, -106, -9, 112, -96, 59,
6639 61, -50, 73, -71, 13, 0, -64, -15 },
6640 simde_mm_set_epi8(INT8_C( -15), INT8_C( -64), INT8_C( 0), INT8_C( 13), INT8_C( -71), INT8_C( 73), INT8_C( -50), INT8_C( 61),
6641 INT8_C( 59), INT8_C( -96), INT8_C( 112), INT8_C( -9), INT8_C(-106), INT8_C( -25), INT8_C( -73), INT8_C( 125)) },
6642 { { 76, 81, -62, 21, -3, 99, -61, 126,
6643 -15, -95, 99, -34, 78, 36, 56, -38 },
6644 simde_mm_set_epi8(INT8_C( -38), INT8_C( 56), INT8_C( 36), INT8_C( 78), INT8_C( -34), INT8_C( 99), INT8_C( -95), INT8_C( -15),
6645 INT8_C( 126), INT8_C( -61), INT8_C( 99), INT8_C( -3), INT8_C( 21), INT8_C( -62), INT8_C( 81), INT8_C( 76)) }
6646 };
6647
6648 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6649 simde__m128i r =
6650 simde_mm_setr_epi8(test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3],
6651 test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7],
6652 test_vec[i].a[ 8], test_vec[i].a[ 9], test_vec[i].a[10], test_vec[i].a[11],
6653 test_vec[i].a[12], test_vec[i].a[13], test_vec[i].a[14], test_vec[i].a[15]);
6654 simde_assert_m128i_i8(r, ==, test_vec[i].r);
6655 }
6656
6657 return 0;
6658 }
6659
6660 static int
6661 test_simde_mm_setr_epi16(SIMDE_MUNIT_TEST_ARGS) {
6662 const struct {
6663 int16_t a[8];
6664 simde__m128i r;
6665 } test_vec[8] = {
6666 { { -10562, -1563, 3119, 8148, -20473, 28066, 19911, 32415 },
6667 simde_mm_set_epi16(INT16_C( 32415), INT16_C( 19911), INT16_C( 28066), INT16_C(-20473),
6668 INT16_C( 8148), INT16_C( 3119), INT16_C( -1563), INT16_C(-10562)) },
6669 { { -5842, -19524, 19809, -4522, -18693, -13515, 10296, -11468 },
6670 simde_mm_set_epi16(INT16_C(-11468), INT16_C( 10296), INT16_C(-13515), INT16_C(-18693),
6671 INT16_C( -4522), INT16_C( 19809), INT16_C(-19524), INT16_C( -5842)) },
6672 { { 21973, -10968, -22468, 4564, 15035, 4920, 15286, 10966 },
6673 simde_mm_set_epi16(INT16_C( 10966), INT16_C( 15286), INT16_C( 4920), INT16_C( 15035),
6674 INT16_C( 4564), INT16_C(-22468), INT16_C(-10968), INT16_C( 21973)) },
6675 { { -30861, 17137, 12124, 23736, -1854, 30822, -26631, 14095 },
6676 simde_mm_set_epi16(INT16_C( 14095), INT16_C(-26631), INT16_C( 30822), INT16_C( -1854),
6677 INT16_C( 23736), INT16_C( 12124), INT16_C( 17137), INT16_C(-30861)) },
6678 { { -8301, -14416, -32194, -4341, 1212, 26290, -16654, -9801 },
6679 simde_mm_set_epi16(INT16_C( -9801), INT16_C(-16654), INT16_C( 26290), INT16_C( 1212),
6680 INT16_C( -4341), INT16_C(-32194), INT16_C(-14416), INT16_C( -8301)) },
6681 { { -5842, 17831, 171, 10031, 7446, 23430, -5408, -23387 },
6682 simde_mm_set_epi16(INT16_C(-23387), INT16_C( -5408), INT16_C( 23430), INT16_C( 7446),
6683 INT16_C( 10031), INT16_C( 171), INT16_C( 17831), INT16_C( -5842)) },
6684 { { 3343, -24774, -5050, 25934, -13848, 27661, 13484, -5817 },
6685 simde_mm_set_epi16(INT16_C( -5817), INT16_C( 13484), INT16_C( 27661), INT16_C(-13848),
6686 INT16_C( 25934), INT16_C( -5050), INT16_C(-24774), INT16_C( 3343)) },
6687 { { 27516, -24147, -18268, 10553, 12061, -22335, 29977, -25416 },
6688 simde_mm_set_epi16(INT16_C(-25416), INT16_C( 29977), INT16_C(-22335), INT16_C( 12061),
6689 INT16_C( 10553), INT16_C(-18268), INT16_C(-24147), INT16_C( 27516)) }
6690 };
6691
6692 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6693 simde__m128i r =
6694 simde_mm_setr_epi16(test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3],
6695 test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7]);
6696 simde_assert_m128i_i16(r, ==, test_vec[i].r);
6697 }
6698
6699 return 0;
6700 }
6701
6702 static int
6703 test_simde_mm_setr_epi32(SIMDE_MUNIT_TEST_ARGS) {
6704 const struct {
6705 int32_t a[4];
6706 simde__m128i r;
6707 } test_vec[8] = {
6708 { { INT32_C( 576930619), INT32_C(-1056617076), INT32_C( 1391020156), INT32_C( -119436850) },
6709 simde_mm_set_epi32(INT32_C( -119436850), INT32_C( 1391020156), INT32_C(-1056617076), INT32_C( 576930619)) },
6710 { { INT32_C(-2038323421), INT32_C(-1916700674), INT32_C( 1438851519), INT32_C( 1990196695) },
6711 simde_mm_set_epi32(INT32_C( 1990196695), INT32_C( 1438851519), INT32_C(-1916700674), INT32_C(-2038323421)) },
6712 { { INT32_C( 1146758814), INT32_C( 625179194), INT32_C(-1226824864), INT32_C(-1523319395) },
6713 simde_mm_set_epi32(INT32_C(-1523319395), INT32_C(-1226824864), INT32_C( 625179194), INT32_C( 1146758814)) },
6714 { { INT32_C( -276839793), INT32_C( 1178530072), INT32_C(-1956542830), INT32_C( -556652843) },
6715 simde_mm_set_epi32(INT32_C( -556652843), INT32_C(-1956542830), INT32_C( 1178530072), INT32_C( -276839793)) },
6716 { { INT32_C(-1720519476), INT32_C( 147115658), INT32_C( 736217848), INT32_C(-1149123643) },
6717 simde_mm_set_epi32(INT32_C(-1149123643), INT32_C( 736217848), INT32_C( 147115658), INT32_C(-1720519476)) },
6718 { { INT32_C( 1888725856), INT32_C( -696349459), INT32_C(-1872984731), INT32_C( 1198325431) },
6719 simde_mm_set_epi32(INT32_C( 1198325431), INT32_C(-1872984731), INT32_C( -696349459), INT32_C( 1888725856)) },
6720 { { INT32_C( 1999809110), INT32_C( -469856594), INT32_C(-1721902839), INT32_C(-1910021155) },
6721 simde_mm_set_epi32(INT32_C(-1910021155), INT32_C(-1721902839), INT32_C( -469856594), INT32_C( 1999809110)) },
6722 { { INT32_C( 57396463), INT32_C(-1219624618), INT32_C( -492678555), INT32_C(-1751286944) },
6723 simde_mm_set_epi32(INT32_C(-1751286944), INT32_C( -492678555), INT32_C(-1219624618), INT32_C( 57396463)) }
6724 };
6725
6726 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6727 simde__m128i r = simde_mm_setr_epi32(test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3]);
6728 simde_assert_m128i_i32(r, ==, test_vec[i].r);
6729 }
6730
6731 return 0;
6732 }
6733
6734 static int
6735 test_simde_mm_setr_epi64(SIMDE_MUNIT_TEST_ARGS) {
6736 const struct {
6737 simde__m64 e1;
6738 simde__m64 e0;
6739 simde__m128i r;
6740 } test_vec[8] = {
6741 { simde_mm_cvtsi64_m64(INT64_C(-4101257248168872649)),
6742 simde_mm_cvtsi64_m64(INT64_C(-2723834683478465794)),
6743 simde_mm_set_epi64x(INT64_C(-2723834683478465794), INT64_C(-4101257248168872649)) },
6744 { simde_mm_cvtsi64_m64(INT64_C(-2051996013747413745)),
6745 simde_mm_cvtsi64_m64(INT64_C(-3184937756541660331)),
6746 simde_mm_set_epi64x(INT64_C(-3184937756541660331), INT64_C(-2051996013747413745)) },
6747 { simde_mm_cvtsi64_m64(INT64_C(-1223296052051875883)),
6748 simde_mm_cvtsi64_m64(INT64_C( 3027248353112135930)),
6749 simde_mm_set_epi64x(INT64_C( 3027248353112135930), INT64_C(-1223296052051875883)) },
6750 { simde_mm_cvtsi64_m64(INT64_C(-8279962275226206621)),
6751 simde_mm_cvtsi64_m64(INT64_C(-2814925648380381958)),
6752 simde_mm_set_epi64x(INT64_C(-2814925648380381958), INT64_C(-8279962275226206621)) },
6753 { simde_mm_cvtsi64_m64(INT64_C( 6755033167475904984)),
6754 simde_mm_cvtsi64_m64(INT64_C(-8685825248847164354)),
6755 simde_mm_set_epi64x(INT64_C(-8685825248847164354), INT64_C( 6755033167475904984)) },
6756 { simde_mm_cvtsi64_m64(INT64_C( 1859833649283237251)),
6757 simde_mm_cvtsi64_m64(INT64_C( 4744285272371342192)),
6758 simde_mm_set_epi64x(INT64_C( 4744285272371342192), INT64_C( 1859833649283237251)) },
6759 { simde_mm_cvtsi64_m64(INT64_C(-2932310525767688549)),
6760 simde_mm_cvtsi64_m64(INT64_C(-5821145293930307405)),
6761 simde_mm_set_epi64x(INT64_C(-5821145293930307405), INT64_C(-2932310525767688549)) },
6762 { simde_mm_cvtsi64_m64(INT64_C( 6748921357249852483)),
6763 simde_mm_cvtsi64_m64(INT64_C(-4633625703225321444)),
6764 simde_mm_set_epi64x(INT64_C(-4633625703225321444), INT64_C( 6748921357249852483)) },
6765 };
6766
6767 // printf("\n");
6768 // for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6769 // simde__m64_private a, b;
6770 // simde__m128i_private r;
6771
6772 // munit_rand_memory(sizeof(a), (uint8_t*) &a);
6773 // munit_rand_memory(sizeof(b), (uint8_t*) &b);
6774
6775 // r = simde__m128i_to_private(simde_mm_setr_epi64(simde__m64_from_private(a), simde__m64_from_private(b)));
6776
6777 // printf(" { simde_mm_cvtsi64_m64(INT64_C(%20" PRId64 ")),\n", a.i64[0]);
6778 // printf(" simde_mm_cvtsi64_m64(INT64_C(%20" PRId64 ")),\n", b.i64[0]);
6779 // printf(" simde_mm_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")) },\n", r.i64[1], r.i64[0]);
6780 // }
6781 // return MUNIT_FAIL;
6782
6783 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6784 simde__m128i r = simde_mm_setr_epi64(test_vec[i].e1, test_vec[i].e0);
6785 simde_assert_m128i_i64(r, ==, test_vec[i].r);
6786 }
6787
6788 return 0;
6789 }
6790
6791 static int
6792 test_simde_mm_setzero_pd(SIMDE_MUNIT_TEST_ARGS) {
6793 simde__m128d a = simde_mm_set1_pd(0);
6794 simde__m128d r = simde_mm_setzero_pd();
6795
6796 simde_assert_m128d_equal(a, r);
6797
6798 return 0;
6799 }
6800
6801 static int
6802 test_simde_mm_setzero_si128(SIMDE_MUNIT_TEST_ARGS) {
6803 simde__m128i a = simde_mm_set1_epi32(0);
6804 simde__m128i r = simde_mm_setzero_si128();
6805
6806 simde_assert_m128i_i32(a, ==, r);
6807
6808 return 0;
6809 }
6810
6811 static int
6812 test_simde_mm_shuffle_epi32(SIMDE_MUNIT_TEST_ARGS) {
6813 const struct {
6814 simde__m128i a;
6815 simde__m128i r;
6816 } test_vec[8] = {
6817 { simde_mm_set_epi32(INT32_C( -749480461), INT32_C(-1872761030), INT32_C( 1690143325), INT32_C( -258848374)),
6818 simde_mm_set_epi32(INT32_C(-1872761030), INT32_C(-1872761030), INT32_C(-1872761030), INT32_C(-1872761030)) },
6819 { simde_mm_set_epi32(INT32_C( 1030695986), INT32_C( 1932252260), INT32_C( 1962976759), INT32_C(-1621624916)),
6820 simde_mm_set_epi32(INT32_C( 1932252260), INT32_C( 1932252260), INT32_C( 1932252260), INT32_C( 1932252260)) },
6821 { simde_mm_set_epi32(INT32_C( -897180326), INT32_C( 1675136548), INT32_C( 1746269378), INT32_C( 1984702409)),
6822 simde_mm_set_epi32(INT32_C( 1675136548), INT32_C( 1675136548), INT32_C( 1675136548), INT32_C( 1675136548)) },
6823 { simde_mm_set_epi32(INT32_C( -11612835), INT32_C(-1878653813), INT32_C(-2135957543), INT32_C( -134555953)),
6824 simde_mm_set_epi32(INT32_C(-1878653813), INT32_C(-1878653813), INT32_C(-1878653813), INT32_C(-1878653813)) },
6825 { simde_mm_set_epi32(INT32_C( 1051337342), INT32_C( 755742115), INT32_C( 338927136), INT32_C( 1410014436)),
6826 simde_mm_set_epi32(INT32_C( 755742115), INT32_C( 755742115), INT32_C( 755742115), INT32_C( 755742115)) },
6827 { simde_mm_set_epi32(INT32_C(-1826960183), INT32_C( -119444047), INT32_C(-1224980361), INT32_C( 1323381864)),
6828 simde_mm_set_epi32(INT32_C( -119444047), INT32_C( -119444047), INT32_C( -119444047), INT32_C( -119444047)) },
6829 { simde_mm_set_epi32(INT32_C( 1256541920), INT32_C( 1446192699), INT32_C( -117794523), INT32_C(-1904270778)),
6830 simde_mm_set_epi32(INT32_C( 1446192699), INT32_C( 1446192699), INT32_C( 1446192699), INT32_C( 1446192699)) },
6831 { simde_mm_set_epi32(INT32_C( 542509546), INT32_C(-1970305999), INT32_C(-1492486994), INT32_C( 1078541043)),
6832 simde_mm_set_epi32(INT32_C(-1970305999), INT32_C(-1970305999), INT32_C(-1970305999), INT32_C(-1970305999)) }
6833 };
6834
6835 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6836 simde__m128i r = simde_mm_shuffle_epi32(test_vec[i].a, 0xaa);
6837 simde_assert_m128i_i32(r, ==, test_vec[i].r);
6838 }
6839
6840 return 0;
6841 }
6842
6843 static int
6844 test_simde_mm_setr_pd(SIMDE_MUNIT_TEST_ARGS) {
6845 const struct {
6846 double e1;
6847 double e0;
6848 simde__m128d r;
6849 } test_vec[8] = {
6850 { 0.74, 0.57,
6851 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.57), SIMDE_FLOAT64_C( 0.74)) },
6852 { 0.52, 0.66,
6853 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.66), SIMDE_FLOAT64_C( 0.52)) },
6854 { 0.54, 0.56,
6855 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.56), SIMDE_FLOAT64_C( 0.54)) },
6856 { 0.95, 0.43,
6857 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.43), SIMDE_FLOAT64_C( 0.95)) },
6858 { 0.53, 0.46,
6859 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.46), SIMDE_FLOAT64_C( 0.53)) },
6860 { 0.33, 0.39,
6861 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.39), SIMDE_FLOAT64_C( 0.33)) },
6862 { 0.48, 0.63,
6863 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.63), SIMDE_FLOAT64_C( 0.48)) },
6864 { 0.08, 0.44,
6865 simde_mm_set_pd(SIMDE_FLOAT64_C( 0.44), SIMDE_FLOAT64_C( 0.08)) },
6866 };
6867
6868 // printf("\n");
6869 // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
6870 // double e1 = munit_rand_double();
6871 // double e0 = munit_rand_double();
6872 // simde__m128d_private r;
6873
6874 // r = simde__m128d_to_private(simde_mm_setr_pd(e1, e0));
6875
6876 // printf(" { %*.2f, %*.2f,\n", 8, e1 , 8, e0);
6877 // printf(" simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]);
6878 // }
6879 // return MUNIT_FAIL;
6880
6881 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6882 simde__m128d r = simde_mm_setr_pd(test_vec[i].e1, test_vec[i].e0);
6883 simde_assert_m128d_close(r, test_vec[i].r, 1);
6884 }
6885
6886 return 0;
6887 }
6888
6889 static int
6890 test_simde_mm_shuffle_pd(SIMDE_MUNIT_TEST_ARGS) {
6891 const struct {
6892 simde__m128d a;
6893 simde__m128d b;
6894 simde__m128d r;
6895 } test_vec[8] = {
6896 { simde_mm_set_pd(SIMDE_FLOAT64_C( 897.05), SIMDE_FLOAT64_C( 524.15)),
6897 simde_mm_set_pd(SIMDE_FLOAT64_C( -346.39), SIMDE_FLOAT64_C( -595.93)),
6898 simde_mm_set_pd(SIMDE_FLOAT64_C( -595.93), SIMDE_FLOAT64_C( 524.15)) },
6899 { simde_mm_set_pd(SIMDE_FLOAT64_C( -684.88), SIMDE_FLOAT64_C( 62.45)),
6900 simde_mm_set_pd(SIMDE_FLOAT64_C( 765.70), SIMDE_FLOAT64_C( -126.52)),
6901 simde_mm_set_pd(SIMDE_FLOAT64_C( -126.52), SIMDE_FLOAT64_C( 62.45)) },
6902 { simde_mm_set_pd(SIMDE_FLOAT64_C( -871.69), SIMDE_FLOAT64_C( -753.55)),
6903 simde_mm_set_pd(SIMDE_FLOAT64_C( -923.31), SIMDE_FLOAT64_C( -103.97)),
6904 simde_mm_set_pd(SIMDE_FLOAT64_C( -103.97), SIMDE_FLOAT64_C( -753.55)) },
6905 { simde_mm_set_pd(SIMDE_FLOAT64_C( -377.03), SIMDE_FLOAT64_C( 701.23)),
6906 simde_mm_set_pd(SIMDE_FLOAT64_C( -672.47), SIMDE_FLOAT64_C( -328.63)),
6907 simde_mm_set_pd(SIMDE_FLOAT64_C( -328.63), SIMDE_FLOAT64_C( 701.23)) },
6908 { simde_mm_set_pd(SIMDE_FLOAT64_C( 238.70), SIMDE_FLOAT64_C( 837.56)),
6909 simde_mm_set_pd(SIMDE_FLOAT64_C( -429.19), SIMDE_FLOAT64_C( 106.67)),
6910 simde_mm_set_pd(SIMDE_FLOAT64_C( 106.67), SIMDE_FLOAT64_C( 837.56)) },
6911 { simde_mm_set_pd(SIMDE_FLOAT64_C( 571.83), SIMDE_FLOAT64_C( -389.51)),
6912 simde_mm_set_pd(SIMDE_FLOAT64_C( 447.48), SIMDE_FLOAT64_C( -8.02)),
6913 simde_mm_set_pd(SIMDE_FLOAT64_C( -8.02), SIMDE_FLOAT64_C( -389.51)) },
6914 { simde_mm_set_pd(SIMDE_FLOAT64_C( -214.27), SIMDE_FLOAT64_C( 549.07)),
6915 simde_mm_set_pd(SIMDE_FLOAT64_C( -967.02), SIMDE_FLOAT64_C( -162.29)),
6916 simde_mm_set_pd(SIMDE_FLOAT64_C( -162.29), SIMDE_FLOAT64_C( 549.07)) },
6917 { simde_mm_set_pd(SIMDE_FLOAT64_C( -528.33), SIMDE_FLOAT64_C( 376.34)),
6918 simde_mm_set_pd(SIMDE_FLOAT64_C( -959.95), SIMDE_FLOAT64_C( -855.93)),
6919 simde_mm_set_pd(SIMDE_FLOAT64_C( -855.93), SIMDE_FLOAT64_C( 376.34)) }
6920 };
6921
6922 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6923 simde__m128d r = simde_mm_shuffle_pd(test_vec[i].a, test_vec[i].b, 0);
6924 simde_assert_m128d_close(r, test_vec[i].r, 1);
6925 }
6926
6927 return 0;
6928 }
6929
6930 static int
6931 test_simde_mm_shufflehi_epi16(SIMDE_MUNIT_TEST_ARGS) {
6932 const struct {
6933 simde__m128i a;
6934 simde__m128i r;
6935 } test_vec[8] = {
6936 { simde_mm_set_epi16(INT16_C( 3588), INT16_C(-23598), INT16_C( -2669), INT16_C( -7880),
6937 INT16_C( 20391), INT16_C( 13327), INT16_C( 18868), INT16_C( 31239)),
6938 simde_mm_set_epi16(INT16_C( 3588), INT16_C( 3588), INT16_C( -2669), INT16_C( -2669),
6939 INT16_C( 20391), INT16_C( 13327), INT16_C( 18868), INT16_C( 31239)) },
6940 { simde_mm_set_epi16(INT16_C( 5701), INT16_C( 15357), INT16_C( 27973), INT16_C(-26447),
6941 INT16_C(-18797), INT16_C(-27249), INT16_C( -9707), INT16_C( -1950)),
6942 simde_mm_set_epi16(INT16_C( 5701), INT16_C( 5701), INT16_C( 27973), INT16_C( 27973),
6943 INT16_C(-18797), INT16_C(-27249), INT16_C( -9707), INT16_C( -1950)) },
6944 { simde_mm_set_epi16(INT16_C(-14544), INT16_C( 26887), INT16_C( -7591), INT16_C( 22567),
6945 INT16_C( -8366), INT16_C(-11381), INT16_C( 1736), INT16_C(-23069)),
6946 simde_mm_set_epi16(INT16_C(-14544), INT16_C(-14544), INT16_C( -7591), INT16_C( -7591),
6947 INT16_C( -8366), INT16_C(-11381), INT16_C( 1736), INT16_C(-23069)) },
6948 { simde_mm_set_epi16(INT16_C( 31637), INT16_C( 12965), INT16_C(-23234), INT16_C(-12784),
6949 INT16_C( 364), INT16_C( 7338), INT16_C( 16998), INT16_C(-14384)),
6950 simde_mm_set_epi16(INT16_C( 31637), INT16_C( 31637), INT16_C(-23234), INT16_C(-23234),
6951 INT16_C( 364), INT16_C( 7338), INT16_C( 16998), INT16_C(-14384)) },
6952 { simde_mm_set_epi16(INT16_C( 20104), INT16_C(-31033), INT16_C( 12782), INT16_C( -8281),
6953 INT16_C( 17249), INT16_C( -1757), INT16_C(-22510), INT16_C(-23902)),
6954 simde_mm_set_epi16(INT16_C( 20104), INT16_C( 20104), INT16_C( 12782), INT16_C( 12782),
6955 INT16_C( 17249), INT16_C( -1757), INT16_C(-22510), INT16_C(-23902)) },
6956 { simde_mm_set_epi16(INT16_C( 28403), INT16_C(-26721), INT16_C( -6834), INT16_C(-28104),
6957 INT16_C( -6404), INT16_C( -5723), INT16_C(-30154), INT16_C( -4442)),
6958 simde_mm_set_epi16(INT16_C( 28403), INT16_C( 28403), INT16_C( -6834), INT16_C( -6834),
6959 INT16_C( -6404), INT16_C( -5723), INT16_C(-30154), INT16_C( -4442)) },
6960 { simde_mm_set_epi16(INT16_C( 18671), INT16_C( -6207), INT16_C( 14078), INT16_C(-30976),
6961 INT16_C(-25644), INT16_C(-24126), INT16_C( 10939), INT16_C(-13801)),
6962 simde_mm_set_epi16(INT16_C( 18671), INT16_C( 18671), INT16_C( 14078), INT16_C( 14078),
6963 INT16_C(-25644), INT16_C(-24126), INT16_C( 10939), INT16_C(-13801)) },
6964 { simde_mm_set_epi16(INT16_C(-28546), INT16_C( 12696), INT16_C(-10401), INT16_C( -8517),
6965 INT16_C( 29702), INT16_C(-10694), INT16_C( 25940), INT16_C( 28112)),
6966 simde_mm_set_epi16(INT16_C(-28546), INT16_C(-28546), INT16_C(-10401), INT16_C(-10401),
6967 INT16_C( 29702), INT16_C(-10694), INT16_C( 25940), INT16_C( 28112)) }
6968 };
6969
6970 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6971 simde__m128i r = simde_mm_shufflehi_epi16(test_vec[i].a, 245);
6972 simde_assert_m128i_i16(r, ==, test_vec[i].r);
6973 }
6974
6975 return 0;
6976 }
6977
6978 static int
6979 test_simde_mm_shufflelo_epi16(SIMDE_MUNIT_TEST_ARGS) {
6980 simde__m128i a, r, e;
6981
6982 a = simde_mm_set_epi16(INT16_C(-24821), INT16_C(-30256), INT16_C( 8570), INT16_C( 11360),
6983 INT16_C(-20759), INT16_C(-23279), INT16_C( 9158), INT16_C( -6205));
6984 e = simde_mm_set_epi16(INT16_C(-24821), INT16_C(-30256), INT16_C( 8570), INT16_C( 11360),
6985 INT16_C( -6205), INT16_C(-23279), INT16_C( 9158), INT16_C(-20759));
6986 r = simde_mm_shufflelo_epi16(a, 39);
6987 simde_assert_m128i_i16(r, ==, e);
6988
6989 a = simde_mm_set_epi16(INT16_C(-26644), INT16_C( -8695), INT16_C( -9741), INT16_C(-14158),
6990 INT16_C( -3323), INT16_C( 7181), INT16_C( 10186), INT16_C(-16906));
6991 e = simde_mm_set_epi16(INT16_C(-26644), INT16_C( -8695), INT16_C( -9741), INT16_C(-14158),
6992 INT16_C( 7181), INT16_C(-16906), INT16_C( 10186), INT16_C( -3323));
6993 r = simde_mm_shufflelo_epi16(a, 135);
6994 simde_assert_m128i_i16(r, ==, e);
6995
6996 a = simde_mm_set_epi16(INT16_C(-20225), INT16_C( 19920), INT16_C( -3607), INT16_C( 11889),
6997 INT16_C( 12271), INT16_C(-20589), INT16_C( 17338), INT16_C( -7507));
6998 e = simde_mm_set_epi16(INT16_C(-20225), INT16_C( 19920), INT16_C( -3607), INT16_C( 11889),
6999 INT16_C( 12271), INT16_C( 17338), INT16_C( 12271), INT16_C( -7507));
7000 r = simde_mm_shufflelo_epi16(a, 220);
7001 simde_assert_m128i_i16(r, ==, e);
7002
7003 a = simde_mm_set_epi16(INT16_C( -8042), INT16_C(-18261), INT16_C( 20990), INT16_C(-18752),
7004 INT16_C( 26566), INT16_C(-27202), INT16_C( -3939), INT16_C( -1274));
7005 e = simde_mm_set_epi16(INT16_C( -8042), INT16_C(-18261), INT16_C( 20990), INT16_C(-18752),
7006 INT16_C( -3939), INT16_C( 26566), INT16_C( -3939), INT16_C( -1274));
7007 r = simde_mm_shufflelo_epi16(a, 116);
7008 simde_assert_m128i_i16(r, ==, e);
7009
7010 a = simde_mm_set_epi16(INT16_C( 5383), INT16_C(-27918), INT16_C( 16559), INT16_C(-31608),
7011 INT16_C( 6504), INT16_C(-11225), INT16_C(-13396), INT16_C( 20261));
7012 e = simde_mm_set_epi16(INT16_C( 5383), INT16_C(-27918), INT16_C( 16559), INT16_C(-31608),
7013 INT16_C(-13396), INT16_C(-13396), INT16_C(-13396), INT16_C(-13396));
7014 r = simde_mm_shufflelo_epi16(a, 85);
7015 simde_assert_m128i_i16(r, ==, e);
7016
7017 a = simde_mm_set_epi16(INT16_C( -8905), INT16_C( 30480), INT16_C( 20250), INT16_C( 30),
7018 INT16_C( 24188), INT16_C( 21861), INT16_C( -9955), INT16_C( 6282));
7019 e = simde_mm_set_epi16(INT16_C( -8905), INT16_C( 30480), INT16_C( 20250), INT16_C( 30),
7020 INT16_C( 6282), INT16_C( 24188), INT16_C( 21861), INT16_C( 21861));
7021 r = simde_mm_shufflelo_epi16(a, 58);
7022 simde_assert_m128i_i16(r, ==, e);
7023
7024 a = simde_mm_set_epi16(INT16_C( 7654), INT16_C( 4685), INT16_C( 25749), INT16_C(-30088),
7025 INT16_C( -7783), INT16_C( 10182), INT16_C( 23640), INT16_C( 4937));
7026 e = simde_mm_set_epi16(INT16_C( 7654), INT16_C( 4685), INT16_C( 25749), INT16_C(-30088),
7027 INT16_C( -7783), INT16_C( 4937), INT16_C( 23640), INT16_C( 4937));
7028 r = simde_mm_shufflelo_epi16(a, 196);
7029 simde_assert_m128i_i16(r, ==, e);
7030
7031 a = simde_mm_set_epi16(INT16_C(-26752), INT16_C( 9125), INT16_C(-14825), INT16_C( 13732),
7032 INT16_C( 15859), INT16_C(-32053), INT16_C(-12419), INT16_C( 17722));
7033 e = simde_mm_set_epi16(INT16_C(-26752), INT16_C( 9125), INT16_C(-14825), INT16_C( 13732),
7034 INT16_C( 15859), INT16_C( 17722), INT16_C(-12419), INT16_C(-32053));
7035 r = simde_mm_shufflelo_epi16(a, 198);
7036 simde_assert_m128i_i16(r, ==, e);
7037
7038 return 0;
7039 }
7040
7041 static int
7042 test_simde_mm_sra_epi16(SIMDE_MUNIT_TEST_ARGS) {
7043 const struct {
7044 simde__m128i a;
7045 simde__m128i count;
7046 simde__m128i r;
7047 } test_vec[8] = {
7048 { simde_mm_set_epi16(INT16_C( 28258), INT16_C( 1159), INT16_C( 20634), INT16_C(-30158),
7049 INT16_C( 10049), INT16_C(-31721), INT16_C(-26691), INT16_C(-28181)),
7050 simde_mm_set_epi16(INT16_C( 11), INT16_C( 6), INT16_C( 10), INT16_C( 8),
7051 INT16_C( 15), INT16_C( 3), INT16_C( 8), INT16_C( 1)),
7052 simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1),
7053 INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1)) },
7054 { simde_mm_set_epi16(INT16_C(-25682), INT16_C( 7964), INT16_C( 1259), INT16_C( 18017),
7055 INT16_C( 10765), INT16_C(-10649), INT16_C( -9400), INT16_C( 12110)),
7056 simde_mm_set_epi16(INT16_C( 3), INT16_C( 8), INT16_C( 3), INT16_C( 2),
7057 INT16_C( 10), INT16_C( 5), INT16_C( 2), INT16_C( 2)),
7058 simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0),
7059 INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0)) },
7060 { simde_mm_set_epi16(INT16_C(-24685), INT16_C( 14370), INT16_C( 13079), INT16_C( -6409),
7061 INT16_C(-18776), INT16_C( 20941), INT16_C( 22692), INT16_C( 312)),
7062 simde_mm_set_epi16(INT16_C( 13), INT16_C( 13), INT16_C( 0), INT16_C( 7),
7063 INT16_C( 4), INT16_C( 10), INT16_C( 15), INT16_C( 11)),
7064 simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1),
7065 INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
7066 { simde_mm_set_epi16(INT16_C( 13442), INT16_C(-32489), INT16_C(-21378), INT16_C( 10156),
7067 INT16_C( 15393), INT16_C( 20131), INT16_C( 15138), INT16_C(-12589)),
7068 simde_mm_set_epi16(INT16_C( 1), INT16_C( 3), INT16_C( 1), INT16_C( 8),
7069 INT16_C( 12), INT16_C( 3), INT16_C( 4), INT16_C( 2)),
7070 simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( 0),
7071 INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( -1)) },
7072 { simde_mm_set_epi16(INT16_C( -9561), INT16_C( 25554), INT16_C( -5305), INT16_C( -7173),
7073 INT16_C(-10064), INT16_C( 31075), INT16_C( 30218), INT16_C(-18929)),
7074 simde_mm_set_epi16(INT16_C( 5), INT16_C( 10), INT16_C( 8), INT16_C( 1),
7075 INT16_C( 12), INT16_C( 3), INT16_C( 10), INT16_C( 10)),
7076 simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1),
7077 INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1)) },
7078 { simde_mm_set_epi16(INT16_C( 14091), INT16_C( 24202), INT16_C( -8543), INT16_C( -7482),
7079 INT16_C(-26143), INT16_C( 20277), INT16_C(-27984), INT16_C(-32658)),
7080 simde_mm_set_epi16(INT16_C( 5), INT16_C( 7), INT16_C( 6), INT16_C( 6),
7081 INT16_C( 10), INT16_C( 3), INT16_C( 7), INT16_C( 11)),
7082 simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1),
7083 INT16_C( -1), INT16_C( 0), INT16_C( -1), INT16_C( -1)) },
7084 { simde_mm_set_epi16(INT16_C(-19726), INT16_C( 12311), INT16_C( 16279), INT16_C( -6277),
7085 INT16_C( 19874), INT16_C(-27089), INT16_C( 14524), INT16_C(-14305)),
7086 simde_mm_set_epi16(INT16_C( 7), INT16_C( 7), INT16_C( 3), INT16_C( 1),
7087 INT16_C( 12), INT16_C( 1), INT16_C( 10), INT16_C( 9)),
7088 simde_mm_set_epi16(INT16_C( -1), INT16_C( 0), INT16_C( 0), INT16_C( -1),
7089 INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( -1)) },
7090 { simde_mm_set_epi16(INT16_C( 19801), INT16_C(-12786), INT16_C( 31632), INT16_C( 19030),
7091 INT16_C(-19420), INT16_C(-12406), INT16_C( 12426), INT16_C( 27612)),
7092 simde_mm_set_epi16(INT16_C( 2), INT16_C( 0), INT16_C( 10), INT16_C( 3),
7093 INT16_C( 9), INT16_C( 0), INT16_C( 1), INT16_C( 8)),
7094 simde_mm_set_epi16(INT16_C( 0), INT16_C( -1), INT16_C( 0), INT16_C( 0),
7095 INT16_C( -1), INT16_C( -1), INT16_C( 0), INT16_C( 0)) }
7096 };
7097
7098 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7099 simde__m128i r = simde_mm_sra_epi16(test_vec[i].a, test_vec[i].count);
7100 simde_assert_m128i_i16(r, ==, test_vec[i].r);
7101 }
7102
7103 return 0;
7104 }
7105
7106 static int
7107 test_simde_mm_sll_epi16(SIMDE_MUNIT_TEST_ARGS) {
7108 const struct {
7109 simde__m128i a;
7110 simde__m128i count;
7111 simde__m128i r;
7112 } test_vec[] = {
7113 { simde_mm_set_epi16(INT16_C(-11777), INT16_C( 26803), INT16_C(-29366), INT16_C(-28135),
7114 INT16_C( 26578), INT16_C(-22566), INT16_C(-18521), INT16_C( -1087)),
7115 simde_mm_set_epi64x(INT64_C(-1766274549416496901), ~INT64_C(0)),
7116 simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0),
7117 INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
7118 { simde_mm_set_epi16(INT16_C( 20694), INT16_C(-29451), INT16_C(-14684), INT16_C( 26977),
7119 INT16_C( 9617), INT16_C( 4798), INT16_C( 6081), INT16_C( 26243)),
7120 simde_mm_set_epi64x(INT64_C(4317315664183993059), INT64_C( 0)),
7121 simde_mm_set_epi16(INT16_C( 20694), INT16_C(-29451), INT16_C(-14684), INT16_C( 26977),
7122 INT16_C( 9617), INT16_C( 4798), INT16_C( 6081), INT16_C( 26243)) },
7123 { simde_mm_set_epi16(INT16_C(-19602), INT16_C(-30869), INT16_C( -4506), INT16_C( 7721),
7124 INT16_C( 10990), INT16_C(-12116), INT16_C( 29998), INT16_C( -194)),
7125 simde_mm_set_epi64x(INT64_C(5323917981768999693), INT64_C( 1)),
7126 simde_mm_set_epi16(INT16_C( 26332), INT16_C( 3798), INT16_C( -9012), INT16_C( 15442),
7127 INT16_C( 21980), INT16_C(-24232), INT16_C( -5540), INT16_C( -388)) },
7128 { simde_mm_set_epi16(INT16_C( -7669), INT16_C(-27334), INT16_C( 24496), INT16_C( 27065),
7129 INT16_C( 13859), INT16_C( 2295), INT16_C( 31737), INT16_C( -2884)),
7130 simde_mm_set_epi64x(INT64_C(4743197663988711830), INT64_C( 2)),
7131 simde_mm_set_epi16(INT16_C(-30676), INT16_C( 21736), INT16_C( 32448), INT16_C(-22812),
7132 INT16_C(-10100), INT16_C( 9180), INT16_C( -4124), INT16_C(-11536)) },
7133 { simde_mm_set_epi16(INT16_C( -8360), INT16_C( 29662), INT16_C( 6226), INT16_C( 10396),
7134 INT16_C(-32749), INT16_C( 20802), INT16_C( 12391), INT16_C( 4472)),
7135 simde_mm_set_epi64x(INT64_C(-4440768506472940517), INT64_C( 3)),
7136 simde_mm_set_epi16(INT16_C( -1344), INT16_C(-24848), INT16_C(-15728), INT16_C( 17632),
7137 INT16_C( 152), INT16_C(-30192), INT16_C(-31944), INT16_C(-29760)) },
7138 { simde_mm_set_epi16(INT16_C( 26979), INT16_C( -773), INT16_C( 29656), INT16_C( 12973),
7139 INT16_C(-28581), INT16_C( -1290), INT16_C( 25294), INT16_C( -882)),
7140 simde_mm_set_epi64x(INT64_C(-8434753600973098893), INT64_C( 4)),
7141 simde_mm_set_epi16(INT16_C(-27088), INT16_C(-12368), INT16_C( 15744), INT16_C( 10960),
7142 INT16_C( 1456), INT16_C(-20640), INT16_C( 11488), INT16_C(-14112)) },
7143 { simde_mm_set_epi16(INT16_C(-20013), INT16_C( 14301), INT16_C(-17775), INT16_C(-12493),
7144 INT16_C(-22187), INT16_C( -2203), INT16_C( 22935), INT16_C( -5230)),
7145 simde_mm_set_epi64x(INT64_C(-718166367052449426), INT64_C( 13)),
7146 simde_mm_set_epi16(INT16_C( 24576), INT16_C(-24576), INT16_C( 8192), INT16_C( 24576),
7147 INT16_C(-24576), INT16_C(-24576), INT16_C( -8192), INT16_C( 16384)) },
7148 { simde_mm_set_epi16(INT16_C( -9377), INT16_C(-13109), INT16_C( 2614), INT16_C(-17099),
7149 INT16_C(-13260), INT16_C( 21790), INT16_C( 8183), INT16_C( 12820)),
7150 simde_mm_set_epi64x(INT64_C(-3082182550035776352), INT64_C( 14)),
7151 simde_mm_set_epi16(INT16_C(-16384), INT16_C(-16384), INT16_C(-32768), INT16_C( 16384),
7152 INT16_C( 0), INT16_C(-32768), INT16_C(-16384), INT16_C( 0)) },
7153 { simde_mm_set_epi16(INT16_C( 21339), INT16_C(-22944), INT16_C( 30792), INT16_C(-23288),
7154 INT16_C(-13340), INT16_C( 7657), INT16_C( 8339), INT16_C( 10093)),
7155 simde_mm_set_epi64x(INT64_C(-8360903661682410487), INT64_C( 15)),
7156 simde_mm_set_epi16(INT16_C(-32768), INT16_C( 0), INT16_C( 0), INT16_C( 0),
7157 INT16_C( 0), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) },
7158 { simde_mm_set_epi16(INT16_C(-12198), INT16_C( 1510), INT16_C( -3241), INT16_C(-10552),
7159 INT16_C(-10041), INT16_C( 23083), INT16_C( 11931), INT16_C( 10037)),
7160 simde_mm_set_epi64x(INT64_C(7382630779200792207), INT64_C( 16)),
7161 simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0),
7162 INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) },
7163 { simde_mm_set_epi16(INT16_C( -4565), INT16_C(-19321), INT16_C( 29437), INT16_C( -8916),
7164 INT16_C( 18870), INT16_C(-29403), INT16_C( 667), INT16_C(-22848)),
7165 simde_mm_set_epi64x(INT64_C( 7403670930710815), INT64_C( 17)),
7166 simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0),
7167 INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0)) }
7168 };
7169
7170 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7171 simde__m128i r = simde_mm_sll_epi16(test_vec[i].a, test_vec[i].count);
7172 simde_assert_m128i_i16(r, ==, test_vec[i].r);
7173 }
7174
7175 return 0;
7176 }
7177
7178 static int
7179 test_simde_mm_sll_epi32(SIMDE_MUNIT_TEST_ARGS) {
7180 const struct {
7181 simde__m128i a;
7182 simde__m128i count;
7183 simde__m128i r;
7184 } test_vec[50] = {
7185 { simde_mm_set_epi32(INT32_C( 1847585989), INT32_C( -535718080), INT32_C(-1279093253), INT32_C( 656800013)),
7186 simde_x_mm_set_epu64x(UINT64_C( 2450913859380011969), UINT64_C(18446744073709551615)),
7187 simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
7188 { simde_mm_set_epi32(INT32_C( 894927109), INT32_C( 930856884), INT32_C( 350764320), INT32_C( 435252602)),
7189 simde_x_mm_set_epu64x(UINT64_C( 4964670149549210828), UINT64_C( 0)),
7190 simde_mm_set_epi32(INT32_C( 894927109), INT32_C( 930856884), INT32_C( 350764320), INT32_C( 435252602)) },
7191 { simde_mm_set_epi32(INT32_C( -264520264), INT32_C( 2022498436), INT32_C(-1437067245), INT32_C( 482847980)),
7192 simde_x_mm_set_epu64x(UINT64_C( 7326459959939805716), UINT64_C( 1)),
7193 simde_mm_set_epi32(INT32_C( -529040528), INT32_C( -249970424), INT32_C( 1420832806), INT32_C( 965695960)) },
7194 { simde_mm_set_epi32(INT32_C( -73269821), INT32_C(-1137239147), INT32_C( 168132057), INT32_C( -131743227)),
7195 simde_x_mm_set_epu64x(UINT64_C( 1477135654656320870), UINT64_C( 2)),
7196 simde_mm_set_epi32(INT32_C( -293079284), INT32_C( -253989292), INT32_C( 672528228), INT32_C( -526972908)) },
7197 { simde_mm_set_epi32(INT32_C( 676475770), INT32_C( 743649739), INT32_C( 1613393787), INT32_C( 257685631)),
7198 simde_x_mm_set_epu64x(UINT64_C(14989079754060836033), UINT64_C( 3)),
7199 simde_mm_set_epi32(INT32_C( 1116838864), INT32_C( 1654230616), INT32_C( 22248408), INT32_C( 2061485048)) },
7200 { simde_mm_set_epi32(INT32_C( 1293905571), INT32_C(-1134008712), INT32_C(-1835354706), INT32_C( -173430307)),
7201 simde_x_mm_set_epu64x(UINT64_C(15716033284919086785), UINT64_C( 29)),
7202 simde_mm_set_epi32(INT32_C( 1610612736), INT32_C( 0), INT32_C(-1073741824), INT32_C(-1610612736)) },
7203 { simde_mm_set_epi32(INT32_C(-1608827194), INT32_C( -758406839), INT32_C(-1895836042), INT32_C(-1122971027)),
7204 simde_x_mm_set_epu64x(UINT64_C( 240001894519477005), UINT64_C( 30)),
7205 simde_mm_set_epi32( INT32_MIN , INT32_C( 1073741824), INT32_MIN , INT32_C( 1073741824)) },
7206 { simde_mm_set_epi32(INT32_C( 1629035853), INT32_C( 172553194), INT32_C( 533866060), INT32_C( 504662481)),
7207 simde_x_mm_set_epu64x(UINT64_C(16117634661514065169), UINT64_C( 31)),
7208 simde_mm_set_epi32( INT32_MIN , INT32_C( 0), INT32_C( 0), INT32_MIN ) },
7209 { simde_mm_set_epi32(INT32_C(-1841013582), INT32_C(-1759681954), INT32_C(-1933278842), INT32_C( 1138123852)),
7210 simde_x_mm_set_epu64x(UINT64_C(16122278597987411920), UINT64_C( 32)),
7211 simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
7212 { simde_mm_set_epi32(INT32_C( 1016164793), INT32_C( 934378122), INT32_C( 1851284098), INT32_C( 118468072)),
7213 simde_x_mm_set_epu64x(UINT64_C( 9847102169886565139), UINT64_C( 33)),
7214 simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }
7215 };
7216
7217 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7218 simde__m128i r = simde_mm_sll_epi32(test_vec[i].a, test_vec[i].count);
7219 simde_assert_m128i_i32(r, ==, test_vec[i].r);
7220 }
7221
7222 return 0;
7223 }
7224
7225 static int
7226 test_simde_mm_sll_epi64(SIMDE_MUNIT_TEST_ARGS) {
7227 const struct {
7228 simde__m128i a;
7229 simde__m128i count;
7230 simde__m128i r;
7231 } test_vec[8] = {
7232 { simde_mm_set_epi64x(INT64_C( 8055788268748421105), INT64_C(-9066834056558614160)),
7233 simde_mm_set_epi64x(INT64_C(-5262793264663215472), INT64_C(16)),
7234 simde_mm_set_epi64x(INT64_C(-1675408870841712640), INT64_C( 2483371706739064832)) },
7235 { simde_mm_set_epi64x(INT64_C( 2441732847819780871), INT64_C( -124127278813603777)),
7236 simde_mm_set_epi64x(INT64_C(-8018169735231443299), INT64_C( 1)),
7237 simde_mm_set_epi64x(INT64_C( 4883465695639561742), INT64_C( -248254557627207554)) },
7238 { simde_mm_set_epi64x(INT64_C(-2211386688605493428), INT64_C( -350563182553241755)),
7239 simde_mm_set_epi64x(INT64_C( 1150552132815785095), INT64_C(12)),
7240 simde_mm_set_epi64x(INT64_C( -488536336711237632), INT64_C( 2939242011266797568)) },
7241 { simde_mm_set_epi64x(INT64_C( 2987527187015640759), INT64_C( 638426944527652749)),
7242 simde_mm_set_epi64x(INT64_C(-1714103729784977145), INT64_C( 5)),
7243 simde_mm_set_epi64x(INT64_C( 3367149615952746208), INT64_C( 1982918151175336352)) },
7244 { simde_mm_set_epi64x(INT64_C( 4972525455608644218), INT64_C( 6137457836149854777)),
7245 simde_mm_set_epi64x(INT64_C(-8922909725876665702), INT64_C( 2)),
7246 simde_mm_set_epi64x(INT64_C( 1443357748725025256), INT64_C( 6103087270889867492)) },
7247 { simde_mm_set_epi64x(INT64_C(-6484089245702098359), INT64_C( 413459708861121590)),
7248 simde_mm_set_epi64x(INT64_C( 7011241116916112587), INT64_C(15)),
7249 simde_mm_set_epi64x(INT64_C(-1038162179743514624), INT64_C( 8337589858421374976)) },
7250 { simde_mm_set_epi64x(INT64_C(-1797418312522800237), INT64_C( 3481510514608785630)),
7251 simde_mm_set_epi64x(INT64_C( 4951339001913100627), INT64_C(13)),
7252 simde_mm_set_epi64x(INT64_C(-3949045366557351936), INT64_C( 1867797720205082624)) },
7253 { simde_mm_set_epi64x(INT64_C( 7626804351806608498), INT64_C(-4244380112569402483)),
7254 simde_mm_set_epi64x(INT64_C( 1577848631857250403), INT64_C( 4)),
7255 simde_mm_set_epi64x(INT64_C(-7098338887061125344), INT64_C( 5876894493727766736)) }
7256 };
7257
7258 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7259 simde__m128i r = simde_mm_sll_epi64(test_vec[i].a, test_vec[i].count);
7260 simde_assert_m128i_i64(r, ==, test_vec[i].r);
7261 }
7262
7263 return 0;
7264 }
7265
7266 static int
7267 test_simde_mm_sqrt_pd (SIMDE_MUNIT_TEST_ARGS) {
7268 static const struct {
7269 const simde_float64 a[2];
7270 const simde_float64 r[2];
7271 } test_vec[] = {
7272 { { SIMDE_FLOAT64_C( 481.04), SIMDE_FLOAT64_C( 845.64) },
7273 { SIMDE_FLOAT64_C( 21.93), SIMDE_FLOAT64_C( 29.08) } },
7274 { { SIMDE_FLOAT64_C( 520.60), SIMDE_FLOAT64_C( 759.12) },
7275 { SIMDE_FLOAT64_C( 22.82), SIMDE_FLOAT64_C( 27.55) } },
7276 { { SIMDE_FLOAT64_C( 35.64), SIMDE_FLOAT64_C( 486.89) },
7277 { SIMDE_FLOAT64_C( 5.97), SIMDE_FLOAT64_C( 22.07) } },
7278 { { SIMDE_FLOAT64_C( -79.78), SIMDE_FLOAT64_C( 723.70) },
7279 { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 26.90) } },
7280 { { SIMDE_FLOAT64_C( 719.24), SIMDE_FLOAT64_C( 373.08) },
7281 { SIMDE_FLOAT64_C( 26.82), SIMDE_FLOAT64_C( 19.32) } },
7282 { { SIMDE_FLOAT64_C( 497.67), SIMDE_FLOAT64_C( 489.69) },
7283 { SIMDE_FLOAT64_C( 22.31), SIMDE_FLOAT64_C( 22.13) } },
7284 { { SIMDE_FLOAT64_C( 925.51), SIMDE_FLOAT64_C( 932.27) },
7285 { SIMDE_FLOAT64_C( 30.42), SIMDE_FLOAT64_C( 30.53) } },
7286 { { SIMDE_FLOAT64_C( -49.82), SIMDE_FLOAT64_C( 705.12) },
7287 { SIMDE_MATH_NAN, SIMDE_FLOAT64_C( 26.55) } }
7288 };
7289
7290 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
7291 simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
7292 simde__m128d r = simde_mm_sqrt_pd(a);
7293 simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1);
7294 }
7295
7296 return 0;
7297 }
7298
7299 static int
7300 test_simde_mm_sqrt_sd(SIMDE_MUNIT_TEST_ARGS) {
7301 const struct {
7302 simde__m128d a;
7303 simde__m128d b;
7304 simde__m128d r;
7305 } test_vec[8] = {
7306 { simde_mm_set_pd(SIMDE_FLOAT64_C( 1.82), SIMDE_FLOAT64_C( 868.47)),
7307 simde_mm_set_pd(SIMDE_FLOAT64_C( 180.11), SIMDE_FLOAT64_C( 621.52)),
7308 simde_mm_set_pd(SIMDE_FLOAT64_C( 1.82), SIMDE_FLOAT64_C( 24.93)) },
7309 { simde_mm_set_pd(SIMDE_FLOAT64_C( 458.20), SIMDE_FLOAT64_C( 211.55)),
7310 simde_mm_set_pd(SIMDE_FLOAT64_C( 430.02), SIMDE_FLOAT64_C( 152.28)),
7311 simde_mm_set_pd(SIMDE_FLOAT64_C( 458.20), SIMDE_FLOAT64_C( 12.34)) },
7312 { simde_mm_set_pd(SIMDE_FLOAT64_C( 790.70), SIMDE_FLOAT64_C( 272.49)),
7313 simde_mm_set_pd(SIMDE_FLOAT64_C( 882.78), SIMDE_FLOAT64_C( 929.30)),
7314 simde_mm_set_pd(SIMDE_FLOAT64_C( 790.70), SIMDE_FLOAT64_C( 30.48)) },
7315 { simde_mm_set_pd(SIMDE_FLOAT64_C( 103.00), SIMDE_FLOAT64_C( 65.43)),
7316 simde_mm_set_pd(SIMDE_FLOAT64_C( 542.46), SIMDE_FLOAT64_C( 784.04)),
7317 simde_mm_set_pd(SIMDE_FLOAT64_C( 103.00), SIMDE_FLOAT64_C( 28.00)) },
7318 { simde_mm_set_pd(SIMDE_FLOAT64_C( 373.53), SIMDE_FLOAT64_C( 698.61)),
7319 simde_mm_set_pd(SIMDE_FLOAT64_C( 142.54), SIMDE_FLOAT64_C( 348.23)),
7320 simde_mm_set_pd(SIMDE_FLOAT64_C( 373.53), SIMDE_FLOAT64_C( 18.66)) },
7321 { simde_mm_set_pd(SIMDE_FLOAT64_C( 528.07), SIMDE_FLOAT64_C( 477.87)),
7322 simde_mm_set_pd(SIMDE_FLOAT64_C( 384.87), SIMDE_FLOAT64_C( 433.33)),
7323 simde_mm_set_pd(SIMDE_FLOAT64_C( 528.07), SIMDE_FLOAT64_C( 20.82)) },
7324 { simde_mm_set_pd(SIMDE_FLOAT64_C( 241.09), SIMDE_FLOAT64_C( 679.09)),
7325 simde_mm_set_pd(SIMDE_FLOAT64_C( 322.35), SIMDE_FLOAT64_C( 620.04)),
7326 simde_mm_set_pd(SIMDE_FLOAT64_C( 241.09), SIMDE_FLOAT64_C( 24.90)) },
7327 { simde_mm_set_pd(SIMDE_FLOAT64_C( 651.18), SIMDE_FLOAT64_C( 446.59)),
7328 simde_mm_set_pd(SIMDE_FLOAT64_C( 886.36), SIMDE_FLOAT64_C( 269.28)),
7329 simde_mm_set_pd(SIMDE_FLOAT64_C( 651.18), SIMDE_FLOAT64_C( 16.41)) }
7330 };
7331
7332 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7333 simde__m128d r = simde_mm_sqrt_sd(test_vec[i].a, test_vec[i].b);
7334 simde_assert_m128d_close(r, test_vec[i].r, 1);
7335 }
7336
7337 return 0;
7338 }
7339
7340 static int
7341 test_simde_mm_srl_epi16(SIMDE_MUNIT_TEST_ARGS) {
7342 const struct {
7343 simde__m128i a;
7344 simde__m128i count;
7345 simde__m128i r;
7346 } test_vec[8] = {
7347 { simde_mm_set_epi16(INT16_C( 1445), INT16_C( 14472), INT16_C(-18508), INT16_C( -4645),
7348 INT16_C(-24581), INT16_C(-12656), INT16_C( 1275), INT16_C(-25245)),
7349 simde_mm_set_epi64x(INT64_C( 4678230141678036905), INT64_C( 4)),
7350 simde_mm_set_epi16(INT16_C( 90), INT16_C( 904), INT16_C( 2939), INT16_C( 3805),
7351 INT16_C( 2559), INT16_C( 3305), INT16_C( 79), INT16_C( 2518)) },
7352 { simde_mm_set_epi16(INT16_C( 986), INT16_C( 31796), INT16_C(-12770), INT16_C(-28401),
7353 INT16_C( 15186), INT16_C(-17595), INT16_C( 31992), INT16_C( 19329)),
7354 simde_mm_set_epi64x(INT64_C( 234386534661459961), INT64_C( 2)),
7355 simde_mm_set_epi16(INT16_C( 246), INT16_C( 7949), INT16_C( 13191), INT16_C( 9283),
7356 INT16_C( 3796), INT16_C( 11985), INT16_C( 7998), INT16_C( 4832)) },
7357 { simde_mm_set_epi16(INT16_C(-23898), INT16_C( 7158), INT16_C( 21829), INT16_C(-16536),
7358 INT16_C( 2052), INT16_C( -6635), INT16_C( 18408), INT16_C( -3755)),
7359 simde_mm_set_epi64x(INT64_C( 8276161762185938564), INT64_C( 7)),
7360 simde_mm_set_epi16(INT16_C( 325), INT16_C( 55), INT16_C( 170), INT16_C( 382),
7361 INT16_C( 16), INT16_C( 460), INT16_C( 143), INT16_C( 482)) },
7362 { simde_mm_set_epi16(INT16_C(-19513), INT16_C(-10508), INT16_C(-12500), INT16_C( 22379),
7363 INT16_C( 4775), INT16_C( 8063), INT16_C( 8132), INT16_C( 7840)),
7364 simde_mm_set_epi64x(INT64_C( 1101003055866698034), INT64_C( 6)),
7365 simde_mm_set_epi16(INT16_C( 719), INT16_C( 859), INT16_C( 828), INT16_C( 349),
7366 INT16_C( 74), INT16_C( 125), INT16_C( 127), INT16_C( 122)) },
7367 { simde_mm_set_epi16(INT16_C( 9942), INT16_C( 29561), INT16_C( -4121), INT16_C(-26882),
7368 INT16_C(-17939), INT16_C( 13186), INT16_C( 6796), INT16_C( 14206)),
7369 simde_mm_set_epi64x(INT64_C( 735258903315099979), INT64_C( 1)),
7370 simde_mm_set_epi16(INT16_C( 4971), INT16_C( 14780), INT16_C( 30707), INT16_C( 19327),
7371 INT16_C( 23798), INT16_C( 6593), INT16_C( 3398), INT16_C( 7103)) },
7372 { simde_mm_set_epi16(INT16_C( 5648), INT16_C(-13469), INT16_C(-23201), INT16_C( 7029),
7373 INT16_C(-28211), INT16_C(-14496), INT16_C( 31202), INT16_C(-32095)),
7374 simde_mm_set_epi64x(INT64_C( 4870695400140482879), INT64_C(13)),
7375 simde_mm_set_epi16(INT16_C( 0), INT16_C( 6), INT16_C( 5), INT16_C( 0),
7376 INT16_C( 4), INT16_C( 6), INT16_C( 3), INT16_C( 4)) },
7377 { simde_mm_set_epi16(INT16_C( 11526), INT16_C( 20336), INT16_C( 18003), INT16_C( 21727),
7378 INT16_C(-28471), INT16_C(-32732), INT16_C(-25472), INT16_C( 12636)),
7379 simde_mm_set_epi64x(INT64_C(-6737308052137237000), INT64_C( 3)),
7380 simde_mm_set_epi16(INT16_C( 1440), INT16_C( 2542), INT16_C( 2250), INT16_C( 2715),
7381 INT16_C( 4633), INT16_C( 4100), INT16_C( 5008), INT16_C( 1579)) },
7382 { simde_mm_set_epi16(INT16_C(-30386), INT16_C( -2761), INT16_C( 11467), INT16_C( 9929),
7383 INT16_C(-19380), INT16_C(-12818), INT16_C( -4584), INT16_C( -6145)),
7384 simde_mm_set_epi64x(INT64_C(-2450775638354168945), INT64_C( 3)),
7385 simde_mm_set_epi16(INT16_C( 4393), INT16_C( 7846), INT16_C( 1433), INT16_C( 1241),
7386 INT16_C( 5769), INT16_C( 6589), INT16_C( 7619), INT16_C( 7423)) }
7387 };
7388
7389 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7390 simde__m128i r = simde_mm_srl_epi16(test_vec[i].a, test_vec[i].count);
7391 simde_assert_m128i_i16(r, ==, test_vec[i].r);
7392 }
7393
7394 return 0;
7395 }
7396
7397 static int
7398 test_simde_mm_srl_epi32(SIMDE_MUNIT_TEST_ARGS) {
7399 const struct {
7400 simde__m128i a;
7401 simde__m128i count;
7402 simde__m128i r;
7403 } test_vec[8] = {
7404 { simde_mm_set_epi32(INT32_C( 94713992), INT32_C(-1212879397), INT32_C(-1610887536), INT32_C( 83598691)),
7405 simde_mm_set_epi64x(INT64_C( 4678230141678036905), INT64_C( 4)),
7406 simde_mm_set_epi32(INT32_C( 5919624), INT32_C( 192630493), INT32_C( 167754985), INT32_C( 5224918)) },
7407 { simde_mm_set_epi32(INT32_C( 64650292), INT32_C( -836857585), INT32_C( 995277637), INT32_C( 2096647041)),
7408 simde_mm_set_epi64x(INT64_C( 234386534661459961), INT64_C( 2)),
7409 simde_mm_set_epi32(INT32_C( 16162573), INT32_C( 864527427), INT32_C( 248819409), INT32_C( 524161760)) },
7410 { simde_mm_set_epi32(INT32_C(-1566172170), INT32_C( 1430634344), INT32_C( 134538773), INT32_C( 1206448469)),
7411 simde_mm_set_epi64x(INT64_C( 8276161762185938564), INT64_C( 7)),
7412 simde_mm_set_epi32(INT32_C( 21318711), INT32_C( 11176830), INT32_C( 1051084), INT32_C( 9425378)) },
7413 { simde_mm_set_epi32(INT32_C(-1278748940), INT32_C( -819177621), INT32_C( 312942463), INT32_C( 532946592)),
7414 simde_mm_set_epi64x(INT64_C( 1101003055866698034), INT64_C( 6)),
7415 simde_mm_set_epi32(INT32_C( 47128411), INT32_C( 54309213), INT32_C( 4889725), INT32_C( 8327290)) },
7416 { simde_mm_set_epi32(INT32_C( 651588473), INT32_C( -270035202), INT32_C(-1175637118), INT32_C( 445396862)),
7417 simde_mm_set_epi64x(INT64_C( 735258903315099979), INT64_C( 1)),
7418 simde_mm_set_epi32(INT32_C( 325794236), INT32_C( 2012466047), INT32_C( 1559665089), INT32_C( 222698431)) },
7419 { simde_mm_set_epi32(INT32_C( 370199395), INT32_C(-1520493707), INT32_C(-1848785056), INT32_C( 2044887713)),
7420 simde_mm_set_epi64x(INT64_C( 4870695400140482879), INT64_C(13)),
7421 simde_mm_set_epi32(INT32_C( 45190), INT32_C( 338680), INT32_C( 298606), INT32_C( 249620)) },
7422 { simde_mm_set_epi32(INT32_C( 755388272), INT32_C( 1179866335), INT32_C(-1865842652), INT32_C(-1669320356)),
7423 simde_mm_set_epi64x(INT64_C(-6737308052137237000), INT64_C( 3)),
7424 simde_mm_set_epi32(INT32_C( 94423534), INT32_C( 147483291), INT32_C( 303640580), INT32_C( 328205867)) },
7425 { simde_mm_set_epi32(INT32_C(-1991314121), INT32_C( 751511241), INT32_C(-1270034962), INT32_C( -300357633)),
7426 simde_mm_set_epi64x(INT64_C(-2450775638354168945), INT64_C( 3)),
7427 simde_mm_set_epi32(INT32_C( 287956646), INT32_C( 93938905), INT32_C( 378116541), INT32_C( 499326207)) }
7428 };
7429
7430 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7431 simde__m128i r = simde_mm_srl_epi32(test_vec[i].a, test_vec[i].count);
7432 simde_assert_m128i_i32(r, ==, test_vec[i].r);
7433 }
7434
7435 return 0;
7436 }
7437
7438 static int
7439 test_simde_mm_srl_epi64(SIMDE_MUNIT_TEST_ARGS) {
7440 const struct {
7441 simde__m128i a;
7442 simde__m128i count;
7443 simde__m128i r;
7444 } test_vec[8] = {
7445 { simde_mm_set_epi64x(INT64_C( 406793501195693531), INT64_C(-6918709284570423965)),
7446 simde_mm_set_epi64x(INT64_C( 4678230141678036905), INT64_C( 4)),
7447 simde_mm_set_epi64x(INT64_C( 25424593824730845), INT64_C( 720502174321195478)) },
7448 { simde_mm_set_epi64x(INT64_C( 277670893274960143), INT64_C( 4274684903451806593)),
7449 simde_mm_set_epi64x(INT64_C( 234386534661459961), INT64_C( 2)),
7450 simde_mm_set_epi64x(INT64_C( 69417723318740035), INT64_C( 1068671225862951648)) },
7451 { simde_mm_set_epi64x(INT64_C(-6726658248624717976), INT64_C( 577839631285416277)),
7452 simde_mm_set_epi64x(INT64_C( 8276161762185938564), INT64_C( 7)),
7453 simde_mm_set_epi64x(INT64_C( 91563170508475262), INT64_C( 4514372119417314)) },
7454 { simde_mm_set_epi64x(INT64_C(-5492184873618876565), INT64_C( 1344077644647636640)),
7455 simde_mm_set_epi64x(INT64_C( 1101003055866698034), INT64_C( 6)),
7456 simde_mm_set_epi64x(INT64_C( 202414987501416797), INT64_C( 21001213197619322)) },
7457 { simde_mm_set_epi64x(INT64_C( 2798551186010511102), INT64_C(-5049322973328296066)),
7458 simde_mm_set_epi64x(INT64_C( 735258903315099979), INT64_C( 1)),
7459 simde_mm_set_epi64x(INT64_C( 1399275593005255551), INT64_C( 6698710550190627775)) },
7460 { simde_mm_set_epi64x(INT64_C( 1589994297298459509), INT64_C(-7940471350808640863)),
7461 simde_mm_set_epi64x(INT64_C( 4870695400140482879), INT64_C(13)),
7462 simde_mm_set_epi64x(INT64_C( 194091100744440), INT64_C( 1282503994494740)) },
7463 { simde_mm_set_epi64x(INT64_C( 3244367925201818847), INT64_C(-8013733167196262052)),
7464 simde_mm_set_epi64x(INT64_C(-6737308052137237000), INT64_C( 3)),
7465 simde_mm_set_epi64x(INT64_C( 405545990650227355), INT64_C( 1304126363314161195)) },
7466 { simde_mm_set_epi64x(INT64_C(-8552629025006475575), INT64_C(-5454758622571993089)),
7467 simde_mm_set_epi64x(INT64_C(-2450775638354168945), INT64_C( 3)),
7468 simde_mm_set_epi64x(INT64_C( 1236764381087884505), INT64_C( 1623998181392194815)) }
7469 };
7470
7471 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7472 simde__m128i r = simde_mm_srl_epi64(test_vec[i].a, test_vec[i].count);
7473 simde_assert_m128i_i64(r, ==, test_vec[i].r);
7474 }
7475
7476 return 0;
7477 }
7478
7479 static int
7480 test_simde_mm_sra_epi32(SIMDE_MUNIT_TEST_ARGS) {
7481 const struct {
7482 simde__m128i a;
7483 simde__m128i count;
7484 simde__m128i r;
7485 } test_vec[] = {
7486 { simde_mm_set_epi32( -561103335, 1276754862, 1749589432, 517536789),
7487 simde_mm_set_epi64x(0, 3),
7488 simde_mm_set_epi32( -70137917, 159594357, 218698679, 64692098) },
7489 { simde_mm_set_epi32( -159892315, -1509631224, -1642880399, 1227124763),
7490 simde_mm_set_epi64x(0, 31),
7491 simde_mm_set_epi32( -1, -1, -1, 0) },
7492 { simde_mm_set_epi32(-1747665335, -1727232090, -1061986990, -1651964431),
7493 simde_mm_set_epi64x(0, 21),
7494 simde_mm_set_epi32( -834, -824, -507, -788) },
7495 { simde_mm_set_epi32( -43034101, 1748997429, -1014034292, -471404994),
7496 simde_mm_set_epi64x(0, 23),
7497 simde_mm_set_epi32( -6, 208, -121, -57) },
7498 { simde_mm_set_epi32( 663988211, 279391652, 930358665, 693100359),
7499 simde_mm_set_epi64x(0, 31),
7500 simde_mm_set_epi32( 0, 0, 0, 0) },
7501 { simde_mm_set_epi32( 1596760027, -525985264, -1328341949, -1278585249),
7502 simde_mm_set_epi64x(0, 30),
7503 simde_mm_set_epi32( 1, -1, -2, -2) },
7504 { simde_mm_set_epi32( 2099244913, -668946691, -1425692748, 1445785661),
7505 simde_mm_set_epi64x(0, 19),
7506 simde_mm_set_epi32( 4003, -1276, -2720, 2757) },
7507 { simde_mm_set_epi32( -572539662, 1511976084, -2125946535, -1043884202),
7508 simde_mm_set_epi64x(0, 4),
7509 simde_mm_set_epi32( -35783729, 94498505, -132871659, -65242763) }
7510 };
7511
7512 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7513 simde__m128i r = simde_mm_sra_epi32(test_vec[i].a, test_vec[i].count);
7514 simde_assert_m128i_i32(r, ==, test_vec[i].r);
7515 }
7516
7517 return 0;
7518 }
7519
7520 static int
7521 test_simde_mm_srai_epi16(SIMDE_MUNIT_TEST_ARGS) {
7522 simde__m128i a, e, r;
7523
7524 a = simde_mm_set_epi16(INT16_C( 11440), INT16_C( 15930), INT16_C( -6862), INT16_C(-12095),
7525 INT16_C( 2973), INT16_C(-25395), INT16_C(-12983), INT16_C(-25536));
7526
7527 e = simde_mm_set_epi16(INT16_C( 11440), INT16_C( 15930), INT16_C( -6862), INT16_C(-12095),
7528 INT16_C( 2973), INT16_C(-25395), INT16_C(-12983), INT16_C(-25536));
7529 r = simde_mm_srai_epi16(a, 0);
7530 simde_assert_m128i_i16(r, ==, e);
7531
7532 e = simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1),
7533 INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1));
7534 r = simde_mm_srai_epi16(a, 16);
7535 simde_assert_m128i_i16(r, ==, e);
7536
7537 e = simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1),
7538 INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1));
7539 r = simde_mm_srai_epi16(a, 42);
7540 simde_assert_m128i_i16(r, ==, e);
7541
7542 e = simde_mm_set_epi16(INT16_C( 89), INT16_C( 124), INT16_C( -54), INT16_C( -95),
7543 INT16_C( 23), INT16_C( -199), INT16_C( -102), INT16_C( -200));
7544 r = simde_mm_srai_epi16(a, 7);
7545 simde_assert_m128i_i16(r, ==, e);
7546
7547 e = simde_mm_set_epi16(INT16_C( 1), INT16_C( 1), INT16_C( -1), INT16_C( -2),
7548 INT16_C( 0), INT16_C( -4), INT16_C( -2), INT16_C( -4));
7549 r = simde_mm_srai_epi16(a, 13);
7550 simde_assert_m128i_i16(r, ==, e);
7551
7552 #if 0
7553 e = simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1),
7554 INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1));
7555 r = simde_mm_srai_epi16(a, -7);
7556 simde_assert_m128i_i16(r, ==, e);
7557
7558 e = simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1),
7559 INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1));
7560 r = simde_mm_srai_epi16(a, -42);
7561 simde_assert_m128i_i16(r, ==, e);
7562
7563 e = simde_mm_set_epi16(INT16_C( 0), INT16_C( 0), INT16_C( -1), INT16_C( -1),
7564 INT16_C( 0), INT16_C( -1), INT16_C( -1), INT16_C( -1));
7565 r = simde_mm_srai_epi16(a, 1729);
7566 simde_assert_m128i_i16(r, ==, e);
7567 #endif
7568
7569 return 0;
7570 }
7571
7572 static int
7573 test_simde_mm_srai_epi32(SIMDE_MUNIT_TEST_ARGS) {
7574 simde__m128i a, e, r;
7575
7576 a = simde_mm_set_epi32(INT32_C(-1377123590), INT32_C( 1981969037), INT32_C( 1025592994), INT32_C( 1213959767));
7577
7578 e = simde_mm_set_epi32(INT32_C(-1377123590), INT32_C( 1981969037), INT32_C( 1025592994), INT32_C( 1213959767));
7579 r = simde_mm_srai_epi32(a, 0);
7580 simde_assert_m128i_i16(r, ==, e);
7581
7582 e = simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0));
7583 r = simde_mm_srai_epi32(a, 32);
7584 simde_assert_m128i_i16(r, ==, e);
7585
7586 e = simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0));
7587 r = simde_mm_srai_epi32(a, 42);
7588 simde_assert_m128i_i16(r, ==, e);
7589
7590 e = simde_mm_set_epi32(INT32_C( -10758779), INT32_C( 15484133), INT32_C( 8012445), INT32_C( 9484060));
7591 r = simde_mm_srai_epi32(a, 7);
7592 simde_assert_m128i_i16(r, ==, e);
7593
7594 e = simde_mm_set_epi32(INT32_C( -168106), INT32_C( 241939), INT32_C( 125194), INT32_C( 148188));
7595 r = simde_mm_srai_epi32(a, 13);
7596 simde_assert_m128i_i16(r, ==, e);
7597
7598 #if 0
7599 e = simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0));
7600 r = simde_mm_srai_epi32(a, -7);
7601 simde_assert_m128i_i16(r, ==, e);
7602
7603 e = simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0));
7604 r = simde_mm_srai_epi32(a, -42);
7605 simde_assert_m128i_i16(r, ==, e);
7606
7607 e = simde_mm_set_epi32(INT32_C( -1), INT32_C( 0), INT32_C( 0), INT32_C( 0));
7608 r = simde_mm_srai_epi32(a, 1729);
7609 simde_assert_m128i_i16(r, ==, e);
7610 #endif
7611
7612 return 0;
7613 }
7614
7615 static int
7616 test_simde_mm_slli_epi16(SIMDE_MUNIT_TEST_ARGS) {
7617 const struct {
7618 simde__m128i a;
7619 simde__m128i r;
7620 } test_vec[8] = {
7621 { simde_mm_set_epi16(INT16_C(-29640), INT16_C( 27486), INT16_C(-30681), INT16_C( 22606),
7622 INT16_C(-21221), INT16_C( 7042), INT16_C( -7099), INT16_C(-13884)),
7623 simde_mm_set_epi16(INT16_C( 25024), INT16_C( 23280), INT16_C( 16696), INT16_C(-15760),
7624 INT16_C( 26840), INT16_C( -9200), INT16_C( 8744), INT16_C( 20000)) },
7625 { simde_mm_set_epi16(INT16_C( 15230), INT16_C( 23269), INT16_C(-21546), INT16_C( 15633),
7626 INT16_C( 9645), INT16_C(-32001), INT16_C( -1446), INT16_C( -7049)),
7627 simde_mm_set_epi16(INT16_C( -9232), INT16_C(-10456), INT16_C( 24240), INT16_C( -6008),
7628 INT16_C( 11624), INT16_C( 6136), INT16_C(-11568), INT16_C( 9144)) },
7629 { simde_mm_set_epi16(INT16_C( -4964), INT16_C( 29371), INT16_C( -7375), INT16_C( 7185),
7630 INT16_C(-25257), INT16_C( 29335), INT16_C( 15023), INT16_C( 23258)),
7631 simde_mm_set_epi16(INT16_C( 25824), INT16_C(-27176), INT16_C( 6536), INT16_C( -8056),
7632 INT16_C( -5448), INT16_C(-27464), INT16_C(-10888), INT16_C(-10544)) },
7633 { simde_mm_set_epi16(INT16_C(-29984), INT16_C(-17481), INT16_C(-31241), INT16_C( 11397),
7634 INT16_C( 2926), INT16_C(-28904), INT16_C(-20560), INT16_C(-32448)),
7635 simde_mm_set_epi16(INT16_C( 22272), INT16_C( -8776), INT16_C( 12216), INT16_C( 25640),
7636 INT16_C( 23408), INT16_C( 30912), INT16_C( 32128), INT16_C( 2560)) },
7637 { simde_mm_set_epi16(INT16_C(-18879), INT16_C( 5889), INT16_C(-27972), INT16_C( -4500),
7638 INT16_C(-12683), INT16_C( 25849), INT16_C( 24809), INT16_C( 26782)),
7639 simde_mm_set_epi16(INT16_C(-19960), INT16_C(-18424), INT16_C(-27168), INT16_C( 29536),
7640 INT16_C( 29608), INT16_C( 10184), INT16_C( 1864), INT16_C( 17648)) },
7641 { simde_mm_set_epi16(INT16_C(-12553), INT16_C(-22953), INT16_C( 21946), INT16_C( -9017),
7642 INT16_C(-10462), INT16_C( -7608), INT16_C( 26015), INT16_C(-24893)),
7643 simde_mm_set_epi16(INT16_C( 30648), INT16_C( 12984), INT16_C(-21040), INT16_C( -6600),
7644 INT16_C(-18160), INT16_C( 4672), INT16_C( 11512), INT16_C( -2536)) },
7645 { simde_mm_set_epi16(INT16_C( 23545), INT16_C( -728), INT16_C( 17963), INT16_C(-24889),
7646 INT16_C( 18443), INT16_C( 19433), INT16_C(-18886), INT16_C(-28120)),
7647 simde_mm_set_epi16(INT16_C( -8248), INT16_C( -5824), INT16_C( 12632), INT16_C( -2504),
7648 INT16_C( 16472), INT16_C( 24392), INT16_C(-20016), INT16_C(-28352)) },
7649 { simde_mm_set_epi16(INT16_C( 1885), INT16_C(-18948), INT16_C(-21057), INT16_C( 636),
7650 INT16_C( -9667), INT16_C(-20298), INT16_C( 25111), INT16_C( 30554)),
7651 simde_mm_set_epi16(INT16_C( 15080), INT16_C(-20512), INT16_C( 28152), INT16_C( 5088),
7652 INT16_C(-11800), INT16_C(-31312), INT16_C( 4280), INT16_C(-17712)) }
7653 };
7654
7655 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7656 simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
7657
7658 simde__m128i r = simde_mm_slli_epi16(test_vec[i].a, 3);
7659 simde_assert_m128i_i16(r, ==, test_vec[i].r);
7660
7661 r = simde_mm_slli_epi16(test_vec[i].a, 0);
7662 simde_assert_m128i_i16(r, ==, test_vec[i].a);
7663
7664 r = simde_mm_slli_epi16(test_vec[i].a, 32);
7665 simde_assert_m128i_i16(r, ==, zeros);
7666
7667 r = simde_mm_slli_epi16(test_vec[i].a, 33);
7668 simde_assert_m128i_i16(r, ==, zeros);
7669 }
7670
7671 return 0;
7672 }
7673
7674 static int
7675 test_simde_mm_srli_epi16(SIMDE_MUNIT_TEST_ARGS) {
7676 const struct {
7677 simde__m128i a;
7678 simde__m128i r;
7679 } test_vec[8] = {
7680 { simde_mm_set_epi16(INT16_C(-29640), INT16_C( 27486), INT16_C(-30681), INT16_C( 22606),
7681 INT16_C(-21221), INT16_C( 7042), INT16_C( -7099), INT16_C(-13884)),
7682 simde_mm_set_epi16(INT16_C( 4487), INT16_C( 3435), INT16_C( 4356), INT16_C( 2825),
7683 INT16_C( 5539), INT16_C( 880), INT16_C( 7304), INT16_C( 6456)) },
7684 { simde_mm_set_epi16(INT16_C( 15230), INT16_C( 23269), INT16_C(-21546), INT16_C( 15633),
7685 INT16_C( 9645), INT16_C(-32001), INT16_C( -1446), INT16_C( -7049)),
7686 simde_mm_set_epi16(INT16_C( 1903), INT16_C( 2908), INT16_C( 5498), INT16_C( 1954),
7687 INT16_C( 1205), INT16_C( 4191), INT16_C( 8011), INT16_C( 7310)) },
7688 { simde_mm_set_epi16(INT16_C( -4964), INT16_C( 29371), INT16_C( -7375), INT16_C( 7185),
7689 INT16_C(-25257), INT16_C( 29335), INT16_C( 15023), INT16_C( 23258)),
7690 simde_mm_set_epi16(INT16_C( 7571), INT16_C( 3671), INT16_C( 7270), INT16_C( 898),
7691 INT16_C( 5034), INT16_C( 3666), INT16_C( 1877), INT16_C( 2907)) },
7692 { simde_mm_set_epi16(INT16_C(-29984), INT16_C(-17481), INT16_C(-31241), INT16_C( 11397),
7693 INT16_C( 2926), INT16_C(-28904), INT16_C(-20560), INT16_C(-32448)),
7694 simde_mm_set_epi16(INT16_C( 4444), INT16_C( 6006), INT16_C( 4286), INT16_C( 1424),
7695 INT16_C( 365), INT16_C( 4579), INT16_C( 5622), INT16_C( 4136)) },
7696 { simde_mm_set_epi16(INT16_C(-18879), INT16_C( 5889), INT16_C(-27972), INT16_C( -4500),
7697 INT16_C(-12683), INT16_C( 25849), INT16_C( 24809), INT16_C( 26782)),
7698 simde_mm_set_epi16(INT16_C( 5832), INT16_C( 736), INT16_C( 4695), INT16_C( 7629),
7699 INT16_C( 6606), INT16_C( 3231), INT16_C( 3101), INT16_C( 3347)) },
7700 { simde_mm_set_epi16(INT16_C(-12553), INT16_C(-22953), INT16_C( 21946), INT16_C( -9017),
7701 INT16_C(-10462), INT16_C( -7608), INT16_C( 26015), INT16_C(-24893)),
7702 simde_mm_set_epi16(INT16_C( 6622), INT16_C( 5322), INT16_C( 2743), INT16_C( 7064),
7703 INT16_C( 6884), INT16_C( 7241), INT16_C( 3251), INT16_C( 5080)) },
7704 { simde_mm_set_epi16(INT16_C( 23545), INT16_C( -728), INT16_C( 17963), INT16_C(-24889),
7705 INT16_C( 18443), INT16_C( 19433), INT16_C(-18886), INT16_C(-28120)),
7706 simde_mm_set_epi16(INT16_C( 2943), INT16_C( 8101), INT16_C( 2245), INT16_C( 5080),
7707 INT16_C( 2305), INT16_C( 2429), INT16_C( 5831), INT16_C( 4677)) },
7708 { simde_mm_set_epi16(INT16_C( 1885), INT16_C(-18948), INT16_C(-21057), INT16_C( 636),
7709 INT16_C( -9667), INT16_C(-20298), INT16_C( 25111), INT16_C( 30554)),
7710 simde_mm_set_epi16(INT16_C( 235), INT16_C( 5823), INT16_C( 5559), INT16_C( 79),
7711 INT16_C( 6983), INT16_C( 5654), INT16_C( 3138), INT16_C( 3819)) }
7712 };
7713
7714 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7715 simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
7716
7717 simde__m128i r = simde_mm_srli_epi16(test_vec[i].a, 3);
7718 simde_assert_m128i_i16(r, ==, test_vec[i].r);
7719
7720 r = simde_mm_srli_epi16(test_vec[i].a, 0);
7721 simde_assert_m128i_i16(r, ==, test_vec[i].a);
7722
7723 r = simde_mm_srli_epi16(test_vec[i].a, 16);
7724 simde_assert_m128i_i16(r, ==, zeros);
7725
7726 r = simde_mm_srli_epi16(test_vec[i].a, 17);
7727 simde_assert_m128i_i16(r, ==, zeros);
7728 }
7729
7730 return 0;
7731 }
7732
7733 static int
7734 test_simde_mm_slli_epi32(SIMDE_MUNIT_TEST_ARGS) {
7735 const struct {
7736 simde__m128i a;
7737 simde__m128i r;
7738 } test_vec[8] = {
7739 { simde_mm_set_epi32(-1285208672, 1618695439, -1484382898, 97979804),
7740 simde_mm_set_epi32( 1822995456, 258646496, -255612480, -1159613568) },
7741 { simde_mm_set_epi32( -215274446, -1750972712, -2134111648, -338295419),
7742 simde_mm_set_epi32( 1701152320, -196551936, 427904000, 2059448480) },
7743 { simde_mm_set_epi32( 406577052, -1272707531, -128013424, 1090211344),
7744 simde_mm_set_epi32( 125563776, -2071935328, 198537728, 527024640) },
7745 { simde_mm_set_epi32( -547315834, 386023226, 789460810, -2097507270),
7746 simde_mm_set_epi32( -334237504, -532158656, -507057856, 1599244096) },
7747 { simde_mm_set_epi32( 1943314584, 126005183, 199695502, -1431967820),
7748 simde_mm_set_epi32( 2056524544, -262801440, 2095288768, 1421670016) },
7749 { simde_mm_set_epi32( 1899687789, -1162493730, 1537811436, 825134965),
7750 simde_mm_set_epi32( 660467104, 1454906304, 1965325696, 634515104) },
7751 { simde_mm_set_epi32(-1452393292, -1781210226, -1307434085, -2039047771),
7752 simde_mm_set_epi32( 768054912, -1164152384, 1111782240, -825019232) },
7753 { simde_mm_set_epi32(-1646930836, 816193989, -1662050152, 347461227),
7754 simde_mm_set_epi32(-1162179200, 348403872, -1645997312, -1766142624) }
7755 };
7756
7757 static const struct {
7758 const int32_t a[4];
7759 const int32_t r[4];
7760 } test_vec_18[] = {
7761 { { INT32_C( 2018447505), INT32_C( 2072485070), -INT32_C( 1063800373), INT32_C( 1619529499) },
7762 { INT32_C( 1111752704), INT32_C( 1933049856), -INT32_C( 953417728), INT32_C( 1013710848) } },
7763 { { INT32_C( 1312528525), -INT32_C( 1886008265), INT32_C( 615191858), INT32_C( 1445629892) },
7764 { INT32_C( 1647575040), INT32_C( 819724288), INT32_C( 1422393344), INT32_C( 1058013184) } },
7765 { { INT32_C( 1842248351), -INT32_C( 504867562), INT32_C( 564232198), INT32_C( 495004047) },
7766 { -INT32_C( 360972288), INT32_C( 1415053312), INT32_C( 1572864), -INT32_C( 1506017280) } },
7767 { { -INT32_C( 127157055), -INT32_C( 1148780408), -INT32_C( 622906602), INT32_C( 1630538178) },
7768 { -INT32_C( 217841664), INT32_C( 35651584), -INT32_C( 866648064), INT32_C( 654835712) } },
7769 { { -INT32_C( 1714487421), INT32_C( 1534834260), -INT32_C( 964944842), INT32_C( 132382278) },
7770 { -INT32_C( 32768000), -INT32_C( 649068544), INT32_C( 1893203968), -INT32_C( 115867648) } },
7771 { { INT32_C( 1124093626), INT32_C( 1711179599), INT32_C( 2084560314), INT32_C( 1792897254) },
7772 { INT32_C( 988282880), INT32_C( 490471424), INT32_C( 1994915840), -INT32_C( 1013448704) } },
7773 { { -INT32_C( 1023169681), -INT32_C( 1742832030), -INT32_C( 513893477), INT32_C( 1407730073) },
7774 { -INT32_C( 1380188160), -INT32_C( 108527616), INT32_C( 1852571648), INT32_C( 107216896) } },
7775 { { -INT32_C( 543758192), INT32_C( 709137520), INT32_C( 1487373169), INT32_C( 1656915187) },
7776 { -INT32_C( 1572864000), INT32_C( 1371537408), INT32_C( 230948864), INT32_C( 332136448) } },
7777 };
7778
7779 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7780 simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
7781
7782 simde__m128i r = simde_mm_slli_epi32(test_vec[i].a, 5);
7783 simde_assert_m128i_i32(r, ==, test_vec[i].r);
7784
7785 r = simde_mm_slli_epi32(test_vec[i].a, 0);
7786 simde_assert_m128i_i32(r, ==, test_vec[i].a);
7787
7788 r = simde_mm_slli_epi32(test_vec[i].a, 32);
7789 simde_assert_m128i_i32(r, ==, zeros);
7790
7791 r = simde_mm_slli_epi32(test_vec[i].a, 33);
7792 simde_assert_m128i_i32(r, ==, zeros);
7793 }
7794
7795 for (size_t i = 0 ; i < (sizeof(test_vec_18) / sizeof(test_vec_18[0])) ; i++) {
7796 simde__m128i a = simde_x_mm_loadu_epi32(test_vec_18[i].a);
7797 simde__m128i r = simde_mm_slli_epi32(a, 18);
7798 simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec_18[i].r));
7799 }
7800
7801 return 0;
7802 }
7803
7804 static int
7805 test_simde_mm_srli_epi32(SIMDE_MUNIT_TEST_ARGS) {
7806 const struct {
7807 simde__m128i a;
7808 simde__m128i r;
7809 } test_vec[8] = {
7810 { simde_mm_set_epi32( 71624167, 617025209, -286267780, -1151099730),
7811 simde_mm_set_epi32( 2238255, 19282037, 125271859, 98245861) },
7812 { simde_mm_set_epi32(-1660949423, 45505817, 1892774959, -917815961),
7813 simde_mm_set_epi32( 82313058, 1422056, 59149217, 105535979) },
7814 { simde_mm_set_epi32( 1642659615, -757986143, -1891097222, 940303240),
7815 simde_mm_set_epi32( 51333112, 110530661, 75120939, 29384476) },
7816 { simde_mm_set_epi32( 1761409447, 115333600, -589319110, -1530115830),
7817 simde_mm_set_epi32( 55044045, 3604175, 115801505, 86401608) },
7818 { simde_mm_set_epi32( -502944468, -1500485927, 32222499, 1115657749),
7819 simde_mm_set_epi32( 118500713, 87327542, 1006953, 34864304) },
7820 { simde_mm_set_epi32( -545012251, 924477372, -1883097200, 1327167226),
7821 simde_mm_set_epi32( 117186095, 28889917, 75370940, 41473975) },
7822 { simde_mm_set_epi32( 995448668, 377764585, -1462273550, 1306007963),
7823 simde_mm_set_epi32( 31107770, 11805143, 88521679, 40812748) },
7824 { simde_mm_set_epi32( 1991954175, 665906947, -606406775, 1678465696),
7825 simde_mm_set_epi32( 62248567, 20809592, 115267516, 52452053) }
7826 };
7827
7828 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7829 simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
7830
7831 simde__m128i r = simde_mm_srli_epi32(test_vec[i].a, 5);
7832 simde_assert_m128i_i32(r, ==, test_vec[i].r);
7833
7834 r = simde_mm_srli_epi32(test_vec[i].a, 0);
7835 simde_assert_m128i_i32(r, ==, test_vec[i].a);
7836
7837 r = simde_mm_srli_epi32(test_vec[i].a, 64);
7838 simde_assert_m128i_i32(r, ==, zeros);
7839
7840 r = simde_mm_srli_epi32(test_vec[i].a, 65);
7841 simde_assert_m128i_i32(r, ==, zeros);
7842 }
7843
7844 static const struct {
7845 const int32_t a[4];
7846 const int32_t r[4];
7847 } test_vec_18[] = {
7848 { { -INT32_C( 1359328745), -INT32_C( 408445706), INT32_C( 239121880), INT32_C( 748205077) },
7849 { INT32_C( 11198), INT32_C( 14825), INT32_C( 912), INT32_C( 2854) } },
7850 { { -INT32_C( 345859164), INT32_C( 1010393205), INT32_C( 1843309992), -INT32_C( 446698290) },
7851 { INT32_C( 15064), INT32_C( 3854), INT32_C( 7031), INT32_C( 14679) } },
7852 { { INT32_C( 764631350), -INT32_C( 837534730), INT32_C( 98325744), -INT32_C( 1405979384) },
7853 { INT32_C( 2916), INT32_C( 13189), INT32_C( 375), INT32_C( 11020) } },
7854 { { -INT32_C( 2053663728), -INT32_C( 1648176907), INT32_C( 1275764862), -INT32_C( 1020106099) },
7855 { INT32_C( 8549), INT32_C( 10096), INT32_C( 4866), INT32_C( 12492) } },
7856 { { -INT32_C( 1175403069), -INT32_C( 259586816), INT32_C( 1660314713), -INT32_C( 384948007) },
7857 { INT32_C( 11900), INT32_C( 15393), INT32_C( 6333), INT32_C( 14915) } },
7858 { { -INT32_C( 1318148420), -INT32_C( 196136842), INT32_C( 1581341137), -INT32_C( 2027850813) },
7859 { INT32_C( 11355), INT32_C( 15635), INT32_C( 6032), INT32_C( 8648) } },
7860 { { INT32_C( 960500280), INT32_C( 1881786391), INT32_C( 97656620), INT32_C( 82764103) },
7861 { INT32_C( 3664), INT32_C( 7178), INT32_C( 372), INT32_C( 315) } },
7862 { { -INT32_C( 38445945), INT32_C( 1592919181), INT32_C( 565982046), -INT32_C( 559358554) },
7863 { INT32_C( 16237), INT32_C( 6076), INT32_C( 2159), INT32_C( 14250) } },
7864 };
7865
7866 for (size_t i = 0 ; i < (sizeof(test_vec_18) / sizeof(test_vec_18[0])) ; i++) {
7867 simde__m128i a = simde_x_mm_loadu_epi32(test_vec_18[i].a);
7868 simde__m128i r = simde_mm_srli_epi32(a, 18);
7869 simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec_18[i].r));
7870 }
7871
7872 return 0;
7873 }
7874
7875 static int
7876 test_simde_mm_slli_epi64(SIMDE_MUNIT_TEST_ARGS) {
7877 const struct {
7878 simde__m128i a;
7879 simde__m128i r;
7880 } test_vec[8] = {
7881 { simde_mm_set_epi64x(INT64_C(-2315072815474662386), INT64_C( 6072154117607221746)),
7882 simde_mm_set_epi64x(INT64_C(-1181415201403959552), INT64_C( 2472475957923215616)) },
7883 { simde_mm_set_epi64x(INT64_C(-2150345518249743204), INT64_C( 3180241355952247476)),
7884 simde_mm_set_epi64x(INT64_C( 1456934769676144128), INT64_C( 1242523940277541376)) },
7885 { simde_mm_set_epi64x(INT64_C( 6492638483912689614), INT64_C( 7722522576063149658)),
7886 simde_mm_set_epi64x(INT64_C( 954242623894447872), INT64_C(-7641290244232631040)) },
7887 { simde_mm_set_epi64x(INT64_C(-3437371876454060839), INT64_C(-7472017034411611746)),
7888 simde_mm_set_epi64x(INT64_C( 2738257582909451392), INT64_C( 2812511428210380544)) },
7889 { simde_mm_set_epi64x(INT64_C(-2994435188669454779), INT64_C(-4713226846452985822)),
7890 simde_mm_set_epi64x(INT64_C( 4093921398210372224), INT64_C( 5449518086433018112)) },
7891 { simde_mm_set_epi64x(INT64_C(-1348831542752523511), INT64_C(-2794326256527200530)),
7892 simde_mm_set_epi64x(INT64_C(-6629740808937044864), INT64_C(-7185623435000187136)) },
7893 { simde_mm_set_epi64x(INT64_C(-5602242705933140185), INT64_C( 6492190109232091873)),
7894 simde_mm_set_epi64x(INT64_C( 2335952515230569344), INT64_C( 896850664777937024)) },
7895 { simde_mm_set_epi64x(INT64_C( 8003331601608352009), INT64_C(-5520322068937257120)),
7896 simde_mm_set_epi64x(INT64_C(-8591223121865833344), INT64_C(-5624950023005949952)) }
7897 };
7898
7899 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7900 simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
7901
7902 simde__m128i r = simde_mm_slli_epi64(test_vec[i].a, 7);
7903 simde_assert_m128i_i32(r, ==, test_vec[i].r);
7904
7905 r = simde_mm_slli_epi64(test_vec[i].a, 0);
7906 simde_assert_m128i_i32(r, ==, test_vec[i].a);
7907
7908 r = simde_mm_slli_epi64(test_vec[i].a, 64);
7909 simde_assert_m128i_i32(r, ==, zeros);
7910
7911 r = simde_mm_slli_epi64(test_vec[i].a, 65);
7912 simde_assert_m128i_i32(r, ==, zeros);
7913 }
7914
7915 return 0;
7916 }
7917
7918 static int
7919 test_simde_mm_srli_epi64(SIMDE_MUNIT_TEST_ARGS) {
7920 const struct {
7921 simde__m128i a;
7922 simde__m128i r;
7923 } test_vec[8] = {
7924 { simde_mm_set_epi64x(INT64_C(-2315072815474662386), INT64_C( 6072154117607221746)),
7925 simde_mm_set_epi64x(INT64_C( 126028681704960072), INT64_C( 47438704043806419)) },
7926 { simde_mm_set_epi64x(INT64_C(-2150345518249743204), INT64_C( 3180241355952247476)),
7927 simde_mm_set_epi64x(INT64_C( 127315613714529753), INT64_C( 24845635593376933)) },
7928 { simde_mm_set_epi64x(INT64_C( 6492638483912689614), INT64_C( 7722522576063149658)),
7929 simde_mm_set_epi64x(INT64_C( 50723738155567887), INT64_C( 60332207625493356)) },
7930 { simde_mm_set_epi64x(INT64_C(-3437371876454060839), INT64_C(-7472017034411611746)),
7931 simde_mm_set_epi64x(INT64_C( 117260720291058521), INT64_C( 85740054994515155)) },
7932 { simde_mm_set_epi64x(INT64_C(-2994435188669454779), INT64_C(-4713226846452985822)),
7933 simde_mm_set_epi64x(INT64_C( 120721163164375756), INT64_C( 107293103337941920)) },
7934 { simde_mm_set_epi64x(INT64_C(-1348831542752523511), INT64_C(-2794326256527200530)),
7935 simde_mm_set_epi64x(INT64_C( 133577441648101782), INT64_C( 122284514196737117)) },
7936 { simde_mm_set_epi64x(INT64_C(-5602242705933140185), INT64_C( 6492190109232091873)),
7937 simde_mm_set_epi64x(INT64_C( 100347666935753214), INT64_C( 50720235228375717)) },
7938 { simde_mm_set_epi64x(INT64_C( 8003331601608352009), INT64_C(-5520322068937257120)),
7939 simde_mm_set_epi64x(INT64_C( 62526028137565250), INT64_C( 100987671912283550)) }
7940 };
7941
7942 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7943 simde__m128i r;
7944 simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
7945
7946 /* r = simde_mm_srli_epi64(test_vec[i].a, -1); */
7947 /* simde_assert_m128i_i32(r, ==, zeros); */
7948
7949 r = simde_mm_srli_epi64(test_vec[i].a, 0);
7950 simde_assert_m128i_i32(r, ==, test_vec[i].a);
7951
7952 r = simde_mm_srli_epi64(test_vec[i].a, 7);
7953 simde_assert_m128i_i32(r, ==, test_vec[i].r);
7954
7955 r = simde_mm_srli_epi64(test_vec[i].a, 64);
7956 simde_assert_m128i_i32(r, ==, zeros);
7957
7958 r = simde_mm_srli_epi64(test_vec[i].a, 65);
7959 simde_assert_m128i_i32(r, ==, zeros);
7960 }
7961
7962 return 0;
7963 }
7964
7965 static int
7966 test_simde_mm_store_pd(SIMDE_MUNIT_TEST_ARGS) {
7967 const struct {
7968 simde__m128d a;
7969 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
7970 } test_vec[8] = {
7971 { simde_mm_set_pd(SIMDE_FLOAT64_C( 825.31), SIMDE_FLOAT64_C( 176.75)),
7972 {SIMDE_FLOAT64_C( 176.75), SIMDE_FLOAT64_C( 825.31) } },
7973 { simde_mm_set_pd(SIMDE_FLOAT64_C( -248.58), SIMDE_FLOAT64_C( -171.93)),
7974 {SIMDE_FLOAT64_C( -171.93), SIMDE_FLOAT64_C( -248.58) } },
7975 { simde_mm_set_pd(SIMDE_FLOAT64_C( 419.77), SIMDE_FLOAT64_C( 712.85)),
7976 {SIMDE_FLOAT64_C( 712.85), SIMDE_FLOAT64_C( 419.77) } },
7977 { simde_mm_set_pd(SIMDE_FLOAT64_C( 785.55), SIMDE_FLOAT64_C( 78.74)),
7978 {SIMDE_FLOAT64_C( 78.74), SIMDE_FLOAT64_C( 785.55) } },
7979 { simde_mm_set_pd(SIMDE_FLOAT64_C( -963.92), SIMDE_FLOAT64_C( 614.28)),
7980 {SIMDE_FLOAT64_C( 614.28), SIMDE_FLOAT64_C( -963.92) } },
7981 { simde_mm_set_pd(SIMDE_FLOAT64_C( 624.62), SIMDE_FLOAT64_C( -260.28)),
7982 {SIMDE_FLOAT64_C( -260.28), SIMDE_FLOAT64_C( 624.62) } },
7983 { simde_mm_set_pd(SIMDE_FLOAT64_C( -178.24), SIMDE_FLOAT64_C( 945.12)),
7984 {SIMDE_FLOAT64_C( 945.12), SIMDE_FLOAT64_C( -178.24) } },
7985 { simde_mm_set_pd(SIMDE_FLOAT64_C( -271.60), SIMDE_FLOAT64_C( -674.20)),
7986 {SIMDE_FLOAT64_C( -674.20), SIMDE_FLOAT64_C( -271.60) } }
7987 };
7988
7989 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
7990 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
7991 simde_mm_store_pd(r, test_vec[i].a);
7992 simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
7993 }
7994
7995 return 0;
7996 }
7997
7998 static int
7999 test_simde_mm_store_pd1(SIMDE_MUNIT_TEST_ARGS) {
8000 const struct {
8001 simde__m128d a;
8002 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8003 } test_vec[8] = {
8004 { simde_mm_set_pd(SIMDE_FLOAT64_C( 278.50), SIMDE_FLOAT64_C( 554.87)),
8005 {SIMDE_FLOAT64_C( 554.87), SIMDE_FLOAT64_C( 554.87) } },
8006 { simde_mm_set_pd(SIMDE_FLOAT64_C( -348.28), SIMDE_FLOAT64_C( 361.13)),
8007 {SIMDE_FLOAT64_C( 361.13), SIMDE_FLOAT64_C( 361.13) } },
8008 { simde_mm_set_pd(SIMDE_FLOAT64_C( -701.38), SIMDE_FLOAT64_C( 708.23)),
8009 {SIMDE_FLOAT64_C( 708.23), SIMDE_FLOAT64_C( 708.23) } },
8010 { simde_mm_set_pd(SIMDE_FLOAT64_C( -362.77), SIMDE_FLOAT64_C( -574.16)),
8011 {SIMDE_FLOAT64_C( -574.16), SIMDE_FLOAT64_C( -574.16) } },
8012 { simde_mm_set_pd(SIMDE_FLOAT64_C( 420.63), SIMDE_FLOAT64_C( 850.70)),
8013 {SIMDE_FLOAT64_C( 850.70), SIMDE_FLOAT64_C( 850.70) } },
8014 { simde_mm_set_pd(SIMDE_FLOAT64_C( -223.78), SIMDE_FLOAT64_C( 845.58)),
8015 {SIMDE_FLOAT64_C( 845.58), SIMDE_FLOAT64_C( 845.58) } },
8016 { simde_mm_set_pd(SIMDE_FLOAT64_C( 948.70), SIMDE_FLOAT64_C( 544.62)),
8017 {SIMDE_FLOAT64_C( 544.62), SIMDE_FLOAT64_C( 544.62) } },
8018 { simde_mm_set_pd(SIMDE_FLOAT64_C( -216.79), SIMDE_FLOAT64_C( -830.24)),
8019 {SIMDE_FLOAT64_C( -830.24), SIMDE_FLOAT64_C( -830.24) } }
8020 };
8021
8022 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8023 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8024 simde_mm_store_pd1(r, test_vec[i].a);
8025 simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8026 }
8027
8028 return 0;
8029 }
8030
8031 static int
8032 test_simde_mm_store_sd(SIMDE_MUNIT_TEST_ARGS) {
8033 const struct {
8034 simde__m128d a;
8035 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 b[2];
8036 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8037 } test_vec[8] = {
8038 { simde_mm_set_pd(SIMDE_FLOAT64_C( -380.32), SIMDE_FLOAT64_C( 589.10)),
8039 {SIMDE_FLOAT64_C( -886.38), SIMDE_FLOAT64_C( 706.27) },
8040 {SIMDE_FLOAT64_C( 589.10), SIMDE_FLOAT64_C( 706.27) } },
8041 { simde_mm_set_pd(SIMDE_FLOAT64_C( 97.55), SIMDE_FLOAT64_C( -921.93)),
8042 {SIMDE_FLOAT64_C( 175.08), SIMDE_FLOAT64_C( -498.43) },
8043 {SIMDE_FLOAT64_C( -921.93), SIMDE_FLOAT64_C( -498.43) } },
8044 { simde_mm_set_pd(SIMDE_FLOAT64_C( -962.76), SIMDE_FLOAT64_C( -267.73)),
8045 {SIMDE_FLOAT64_C( -505.37), SIMDE_FLOAT64_C( -729.92) },
8046 {SIMDE_FLOAT64_C( -267.73), SIMDE_FLOAT64_C( -729.92) } },
8047 { simde_mm_set_pd(SIMDE_FLOAT64_C( 154.59), SIMDE_FLOAT64_C( -829.83)),
8048 {SIMDE_FLOAT64_C( 141.33), SIMDE_FLOAT64_C( 657.26) },
8049 {SIMDE_FLOAT64_C( -829.83), SIMDE_FLOAT64_C( 657.26) } },
8050 { simde_mm_set_pd(SIMDE_FLOAT64_C( -623.49), SIMDE_FLOAT64_C( -306.50)),
8051 {SIMDE_FLOAT64_C( -540.89), SIMDE_FLOAT64_C( 213.61) },
8052 {SIMDE_FLOAT64_C( -306.50), SIMDE_FLOAT64_C( 213.61) } },
8053 { simde_mm_set_pd(SIMDE_FLOAT64_C( 708.57), SIMDE_FLOAT64_C( -626.05)),
8054 {SIMDE_FLOAT64_C( -658.64), SIMDE_FLOAT64_C( 310.68) },
8055 {SIMDE_FLOAT64_C( -626.05), SIMDE_FLOAT64_C( 310.68) } },
8056 { simde_mm_set_pd(SIMDE_FLOAT64_C( 7.10), SIMDE_FLOAT64_C( 84.59)),
8057 {SIMDE_FLOAT64_C( 191.88), SIMDE_FLOAT64_C( -258.06) },
8058 {SIMDE_FLOAT64_C( 84.59), SIMDE_FLOAT64_C( -258.06) } },
8059 { simde_mm_set_pd(SIMDE_FLOAT64_C( 399.99), SIMDE_FLOAT64_C( -337.50)),
8060 {SIMDE_FLOAT64_C( 733.91), SIMDE_FLOAT64_C( -756.49) },
8061 {SIMDE_FLOAT64_C( -337.50), SIMDE_FLOAT64_C( -756.49) } }
8062 };
8063
8064 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8065 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8066 simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b));
8067 simde_mm_store_sd(r, test_vec[i].a);
8068 simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8069 }
8070
8071 return 0;
8072 }
8073
8074 static int
8075 test_simde_mm_store_si128(SIMDE_MUNIT_TEST_ARGS) {
8076 const struct {
8077 simde__m128i a;
8078 simde__m128i r;
8079 } test_vec[8] = {
8080 { simde_mm_set_epi32(INT32_C(-1969078312), INT32_C( 1646650233), INT32_C(-1190611301), INT32_C( 889904733)),
8081 simde_mm_set_epi32(INT32_C(-1969078312), INT32_C( 1646650233), INT32_C(-1190611301), INT32_C( 889904733)) },
8082 { simde_mm_set_epi32(INT32_C( 361491951), INT32_C(-1497327260), INT32_C(-2092062445), INT32_C(-1242536811)),
8083 simde_mm_set_epi32(INT32_C( 361491951), INT32_C(-1497327260), INT32_C(-2092062445), INT32_C(-1242536811)) },
8084 { simde_mm_set_epi32(INT32_C( 790325756), INT32_C( -295457696), INT32_C( 30297459), INT32_C( 860807687)),
8085 simde_mm_set_epi32(INT32_C( 790325756), INT32_C( -295457696), INT32_C( 30297459), INT32_C( 860807687)) },
8086 { simde_mm_set_epi32(INT32_C(-1228048681), INT32_C( 1236867704), INT32_C(-1927827785), INT32_C(-1233913343)),
8087 simde_mm_set_epi32(INT32_C(-1228048681), INT32_C( 1236867704), INT32_C(-1927827785), INT32_C(-1233913343)) },
8088 { simde_mm_set_epi32(INT32_C( 1007412231), INT32_C( -296710614), INT32_C(-1416317108), INT32_C( -839008134)),
8089 simde_mm_set_epi32(INT32_C( 1007412231), INT32_C( -296710614), INT32_C(-1416317108), INT32_C( -839008134)) },
8090 { simde_mm_set_epi32(INT32_C( 1325410731), INT32_C( 2049780007), INT32_C( 190337706), INT32_C( 1948643128)),
8091 simde_mm_set_epi32(INT32_C( 1325410731), INT32_C( 2049780007), INT32_C( 190337706), INT32_C( 1948643128)) },
8092 { simde_mm_set_epi32(INT32_C(-1295145224), INT32_C( -913388140), INT32_C(-1185110338), INT32_C( 127220065)),
8093 simde_mm_set_epi32(INT32_C(-1295145224), INT32_C( -913388140), INT32_C(-1185110338), INT32_C( 127220065)) },
8094 { simde_mm_set_epi32(INT32_C( 479405479), INT32_C( 641965302), INT32_C(-1100092667), INT32_C( 1837148945)),
8095 simde_mm_set_epi32(INT32_C( 479405479), INT32_C( 641965302), INT32_C(-1100092667), INT32_C( 1837148945)) }
8096 };
8097
8098 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8099 simde__m128i r;
8100 simde_mm_store_si128(&r, test_vec[i].a);
8101 simde_assert_m128i_i32(r, ==, test_vec[i].r);
8102 }
8103
8104 return 0;
8105 }
8106
8107 static int
8108 test_simde_mm_storeh_pd(SIMDE_MUNIT_TEST_ARGS) {
8109 const struct {
8110 simde__m128d a;
8111 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 b[2];
8112 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8113 } test_vec[8] = {
8114 { simde_mm_set_pd(SIMDE_FLOAT64_C( -154.79), SIMDE_FLOAT64_C( 689.59)),
8115 {SIMDE_FLOAT64_C( -986.30), SIMDE_FLOAT64_C( -463.82) },
8116 {SIMDE_FLOAT64_C( -154.79), SIMDE_FLOAT64_C( -463.82) } },
8117 { simde_mm_set_pd(SIMDE_FLOAT64_C( 944.07), SIMDE_FLOAT64_C( -598.47)),
8118 {SIMDE_FLOAT64_C( -514.42), SIMDE_FLOAT64_C( 652.02) },
8119 {SIMDE_FLOAT64_C( 944.07), SIMDE_FLOAT64_C( 652.02) } },
8120 { simde_mm_set_pd(SIMDE_FLOAT64_C( -34.72), SIMDE_FLOAT64_C( -771.52)),
8121 {SIMDE_FLOAT64_C( 343.91), SIMDE_FLOAT64_C( -171.75) },
8122 {SIMDE_FLOAT64_C( -34.72), SIMDE_FLOAT64_C( -171.75) } },
8123 { simde_mm_set_pd(SIMDE_FLOAT64_C( 305.40), SIMDE_FLOAT64_C( -671.87)),
8124 {SIMDE_FLOAT64_C( -579.65), SIMDE_FLOAT64_C( -985.37) },
8125 {SIMDE_FLOAT64_C( 305.40), SIMDE_FLOAT64_C( -985.37) } },
8126 { simde_mm_set_pd(SIMDE_FLOAT64_C( 173.51), SIMDE_FLOAT64_C( 643.06)),
8127 {SIMDE_FLOAT64_C( 794.84), SIMDE_FLOAT64_C( 233.08) },
8128 {SIMDE_FLOAT64_C( 173.51), SIMDE_FLOAT64_C( 233.08) } },
8129 { simde_mm_set_pd(SIMDE_FLOAT64_C( -130.21), SIMDE_FLOAT64_C( -290.59)),
8130 {SIMDE_FLOAT64_C( 584.05), SIMDE_FLOAT64_C( -167.57) },
8131 {SIMDE_FLOAT64_C( -130.21), SIMDE_FLOAT64_C( -167.57) } },
8132 { simde_mm_set_pd(SIMDE_FLOAT64_C( -759.48), SIMDE_FLOAT64_C( 428.70)),
8133 {SIMDE_FLOAT64_C( 36.98), SIMDE_FLOAT64_C( -189.97) },
8134 {SIMDE_FLOAT64_C( -759.48), SIMDE_FLOAT64_C( -189.97) } },
8135 { simde_mm_set_pd(SIMDE_FLOAT64_C( 222.49), SIMDE_FLOAT64_C( 621.71)),
8136 {SIMDE_FLOAT64_C( -467.95), SIMDE_FLOAT64_C( -910.73) },
8137 {SIMDE_FLOAT64_C( 222.49), SIMDE_FLOAT64_C( -910.73) } }
8138 };
8139
8140 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8141 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8142 simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b));
8143 simde_mm_storeh_pd(r, test_vec[i].a);
8144 simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8145 }
8146
8147 return 0;
8148 }
8149
8150 static int
8151 test_simde_mm_storel_epi64(SIMDE_MUNIT_TEST_ARGS) {
8152 const struct {
8153 simde__m128i a;
8154 simde__m128i b;
8155 simde__m128i r;
8156 } test_vec[8] = {
8157 { simde_mm_set_epi64x(INT64_C(-8572402204481175152), INT64_C(-3565447379630862345)),
8158 simde_mm_set_epi64x(INT64_C(-5836787758646654491), INT64_C( 978262207997446536)),
8159 simde_mm_set_epi64x(INT64_C(-5836787758646654491), INT64_C(-3565447379630862345)) },
8160 { simde_mm_set_epi64x(INT64_C( 883894259135204982), INT64_C(-6785295924552521928)),
8161 simde_mm_set_epi64x(INT64_C( 5751908210058630765), INT64_C(-7999305285706001942)),
8162 simde_mm_set_epi64x(INT64_C( 5751908210058630765), INT64_C(-6785295924552521928)) },
8163 { simde_mm_set_epi64x(INT64_C( 4991496111910955453), INT64_C(-1947231678451890517)),
8164 simde_mm_set_epi64x(INT64_C( 1054715717267865334), INT64_C(-5199938312574175167)),
8165 simde_mm_set_epi64x(INT64_C( 1054715717267865334), INT64_C(-1947231678451890517)) },
8166 { simde_mm_set_epi64x(INT64_C(-6916286228894702079), INT64_C(-7888320918323423602)),
8167 simde_mm_set_epi64x(INT64_C(-4560271213984560857), INT64_C( 1030486561279856923)),
8168 simde_mm_set_epi64x(INT64_C(-4560271213984560857), INT64_C(-7888320918323423602)) },
8169 { simde_mm_set_epi64x(INT64_C(-5516402797122916761), INT64_C( 8516393373254709766)),
8170 simde_mm_set_epi64x(INT64_C(-8984432431227422893), INT64_C(-1285772213781786319)),
8171 simde_mm_set_epi64x(INT64_C(-8984432431227422893), INT64_C( 8516393373254709766)) },
8172 { simde_mm_set_epi64x(INT64_C( 1537881028582424966), INT64_C( 3855597324285413517)),
8173 simde_mm_set_epi64x(INT64_C(-1087659369158402202), INT64_C( 5504181592152866903)),
8174 simde_mm_set_epi64x(INT64_C(-1087659369158402202), INT64_C( 3855597324285413517)) },
8175 { simde_mm_set_epi64x(INT64_C(-1003754336566127903), INT64_C( 3155788073225494266)),
8176 simde_mm_set_epi64x(INT64_C( 7014294951579480267), INT64_C(-6777837266490471507)),
8177 simde_mm_set_epi64x(INT64_C( 7014294951579480267), INT64_C( 3155788073225494266)) },
8178 { simde_mm_set_epi64x(INT64_C( 7343239871058385173), INT64_C(-8089093160963830084)),
8179 simde_mm_set_epi64x(INT64_C(-7180996141698966448), INT64_C( 1747758344108352756)),
8180 simde_mm_set_epi64x(INT64_C(-7180996141698966448), INT64_C(-8089093160963830084)) }
8181 };
8182
8183 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8184 simde__m128i r;
8185 simde_memcpy(&r, &(test_vec[i].b), sizeof(r));
8186 simde_mm_storel_epi64(&r, test_vec[i].a);
8187 simde_assert_m128i_i64(r, ==, test_vec[i].r);
8188 }
8189
8190 return 0;
8191 }
8192
8193 static int
8194 test_simde_mm_storel_pd(SIMDE_MUNIT_TEST_ARGS) {
8195 const struct {
8196 simde__m128d a;
8197 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 b[2];
8198 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8199 } test_vec[8] = {
8200 { simde_mm_set_pd(SIMDE_FLOAT64_C( -887.08), SIMDE_FLOAT64_C( -520.70)),
8201 {SIMDE_FLOAT64_C( -258.49), SIMDE_FLOAT64_C( 913.00) },
8202 {SIMDE_FLOAT64_C( -520.70), SIMDE_FLOAT64_C( 913.00) } },
8203 { simde_mm_set_pd(SIMDE_FLOAT64_C( 724.04), SIMDE_FLOAT64_C( -774.49)),
8204 {SIMDE_FLOAT64_C( 557.37), SIMDE_FLOAT64_C( -701.13) },
8205 {SIMDE_FLOAT64_C( -774.49), SIMDE_FLOAT64_C( -701.13) } },
8206 { simde_mm_set_pd(SIMDE_FLOAT64_C( -366.90), SIMDE_FLOAT64_C( -168.25)),
8207 {SIMDE_FLOAT64_C( 485.14), SIMDE_FLOAT64_C( 500.94) },
8208 {SIMDE_FLOAT64_C( -168.25), SIMDE_FLOAT64_C( 500.94) } },
8209 { simde_mm_set_pd(SIMDE_FLOAT64_C( -783.51), SIMDE_FLOAT64_C( -187.73)),
8210 {SIMDE_FLOAT64_C( -391.92), SIMDE_FLOAT64_C( -506.74) },
8211 {SIMDE_FLOAT64_C( -187.73), SIMDE_FLOAT64_C( -506.74) } },
8212 { simde_mm_set_pd(SIMDE_FLOAT64_C( -50.27), SIMDE_FLOAT64_C( -405.84)),
8213 {SIMDE_FLOAT64_C( -733.12), SIMDE_FLOAT64_C( -697.37) },
8214 {SIMDE_FLOAT64_C( -405.84), SIMDE_FLOAT64_C( -697.37) } },
8215 { simde_mm_set_pd(SIMDE_FLOAT64_C( -773.31), SIMDE_FLOAT64_C( -470.65)),
8216 {SIMDE_FLOAT64_C( 738.01), SIMDE_FLOAT64_C( -908.23) },
8217 {SIMDE_FLOAT64_C( -470.65), SIMDE_FLOAT64_C( -908.23) } },
8218 { simde_mm_set_pd(SIMDE_FLOAT64_C( -43.07), SIMDE_FLOAT64_C( -143.29)),
8219 {SIMDE_FLOAT64_C( 985.95), SIMDE_FLOAT64_C( 19.70) },
8220 {SIMDE_FLOAT64_C( -143.29), SIMDE_FLOAT64_C( 19.70) } },
8221 { simde_mm_set_pd(SIMDE_FLOAT64_C( 649.59), SIMDE_FLOAT64_C( -925.70)),
8222 {SIMDE_FLOAT64_C( 519.96), SIMDE_FLOAT64_C( 348.23) },
8223 {SIMDE_FLOAT64_C( -925.70), SIMDE_FLOAT64_C( 348.23) } }
8224 };
8225
8226 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8227 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8228 simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b));
8229 simde_mm_storel_pd(r, test_vec[i].a);
8230 simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8231 }
8232
8233 return 0;
8234 }
8235
8236 static int
8237 test_simde_mm_storer_pd(SIMDE_MUNIT_TEST_ARGS) {
8238 const struct {
8239 simde__m128d a;
8240 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 b[2];
8241 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8242 } test_vec[8] = {
8243 { simde_mm_set_pd(SIMDE_FLOAT64_C( 765.90), SIMDE_FLOAT64_C( -392.20)),
8244 {SIMDE_FLOAT64_C( -898.96), SIMDE_FLOAT64_C( 810.87) },
8245 {SIMDE_FLOAT64_C( 765.90), SIMDE_FLOAT64_C( -392.20) } },
8246 { simde_mm_set_pd(SIMDE_FLOAT64_C( 848.80), SIMDE_FLOAT64_C( -20.45)),
8247 {SIMDE_FLOAT64_C( -298.33), SIMDE_FLOAT64_C( 199.86) },
8248 {SIMDE_FLOAT64_C( 848.80), SIMDE_FLOAT64_C( -20.45) } },
8249 { simde_mm_set_pd(SIMDE_FLOAT64_C( -770.38), SIMDE_FLOAT64_C( 73.29)),
8250 {SIMDE_FLOAT64_C( -471.45), SIMDE_FLOAT64_C( 85.53) },
8251 {SIMDE_FLOAT64_C( -770.38), SIMDE_FLOAT64_C( 73.29) } },
8252 { simde_mm_set_pd(SIMDE_FLOAT64_C( 181.32), SIMDE_FLOAT64_C( -528.68)),
8253 {SIMDE_FLOAT64_C( 925.12), SIMDE_FLOAT64_C( -79.25) },
8254 {SIMDE_FLOAT64_C( 181.32), SIMDE_FLOAT64_C( -528.68) } },
8255 { simde_mm_set_pd(SIMDE_FLOAT64_C( 786.51), SIMDE_FLOAT64_C( -396.45)),
8256 {SIMDE_FLOAT64_C( -196.75), SIMDE_FLOAT64_C( -493.37) },
8257 {SIMDE_FLOAT64_C( 786.51), SIMDE_FLOAT64_C( -396.45) } },
8258 { simde_mm_set_pd(SIMDE_FLOAT64_C( 379.82), SIMDE_FLOAT64_C( -482.63)),
8259 {SIMDE_FLOAT64_C( 356.61), SIMDE_FLOAT64_C( 6.76) },
8260 {SIMDE_FLOAT64_C( 379.82), SIMDE_FLOAT64_C( -482.63) } },
8261 { simde_mm_set_pd(SIMDE_FLOAT64_C( -597.31), SIMDE_FLOAT64_C( -427.66)),
8262 {SIMDE_FLOAT64_C( -787.49), SIMDE_FLOAT64_C( 322.82) },
8263 {SIMDE_FLOAT64_C( -597.31), SIMDE_FLOAT64_C( -427.66) } },
8264 { simde_mm_set_pd(SIMDE_FLOAT64_C( 515.42), SIMDE_FLOAT64_C( 801.05)),
8265 {SIMDE_FLOAT64_C( -892.50), SIMDE_FLOAT64_C( 794.29) },
8266 {SIMDE_FLOAT64_C( 515.42), SIMDE_FLOAT64_C( 801.05) } }
8267 };
8268
8269 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8270 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8271 simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b));
8272 simde_mm_storer_pd(r, test_vec[i].a);
8273 simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8274 }
8275
8276 return 0;
8277 }
8278
8279 static int
8280 test_simde_mm_storeu_pd(SIMDE_MUNIT_TEST_ARGS) {
8281 const struct {
8282 simde__m128d a;
8283 simde_float64 b[2];
8284 simde_float64 r[2];
8285 } test_vec[8] = {
8286 { simde_mm_set_pd(SIMDE_FLOAT64_C( -787.29), SIMDE_FLOAT64_C( 410.40)),
8287 {SIMDE_FLOAT64_C( 579.61), SIMDE_FLOAT64_C( -320.32) },
8288 {SIMDE_FLOAT64_C( 410.40), SIMDE_FLOAT64_C( -787.29) } },
8289 { simde_mm_set_pd(SIMDE_FLOAT64_C( 944.41), SIMDE_FLOAT64_C( -149.27)),
8290 {SIMDE_FLOAT64_C( 850.87), SIMDE_FLOAT64_C( -993.24) },
8291 {SIMDE_FLOAT64_C( -149.27), SIMDE_FLOAT64_C( 944.41) } },
8292 { simde_mm_set_pd(SIMDE_FLOAT64_C( -415.98), SIMDE_FLOAT64_C( -916.88)),
8293 {SIMDE_FLOAT64_C( 966.39), SIMDE_FLOAT64_C( -183.52) },
8294 {SIMDE_FLOAT64_C( -916.88), SIMDE_FLOAT64_C( -415.98) } },
8295 { simde_mm_set_pd(SIMDE_FLOAT64_C( 431.98), SIMDE_FLOAT64_C( -691.20)),
8296 {SIMDE_FLOAT64_C( -659.73), SIMDE_FLOAT64_C( -34.04) },
8297 {SIMDE_FLOAT64_C( -691.20), SIMDE_FLOAT64_C( 431.98) } },
8298 { simde_mm_set_pd(SIMDE_FLOAT64_C( -483.35), SIMDE_FLOAT64_C( 766.13)),
8299 {SIMDE_FLOAT64_C( -638.61), SIMDE_FLOAT64_C( 157.38) },
8300 {SIMDE_FLOAT64_C( 766.13), SIMDE_FLOAT64_C( -483.35) } },
8301 { simde_mm_set_pd(SIMDE_FLOAT64_C( 386.12), SIMDE_FLOAT64_C( 330.08)),
8302 {SIMDE_FLOAT64_C( 588.80), SIMDE_FLOAT64_C( -111.35) },
8303 {SIMDE_FLOAT64_C( 330.08), SIMDE_FLOAT64_C( 386.12) } },
8304 { simde_mm_set_pd(SIMDE_FLOAT64_C( 45.12), SIMDE_FLOAT64_C( 964.86)),
8305 {SIMDE_FLOAT64_C( 199.95), SIMDE_FLOAT64_C( 998.07) },
8306 {SIMDE_FLOAT64_C( 964.86), SIMDE_FLOAT64_C( 45.12) } },
8307 { simde_mm_set_pd(SIMDE_FLOAT64_C( -30.76), SIMDE_FLOAT64_C( -723.78)),
8308 {SIMDE_FLOAT64_C( -8.78), SIMDE_FLOAT64_C( 410.81) },
8309 {SIMDE_FLOAT64_C( -723.78), SIMDE_FLOAT64_C( -30.76) } }
8310 };
8311
8312 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8313 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8314 simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b));
8315 simde_mm_storeu_pd(r, test_vec[i].a);
8316 simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8317 }
8318
8319 return 0;
8320 }
8321
8322 static int
8323 test_simde_mm_storeu_si128(SIMDE_MUNIT_TEST_ARGS) {
8324 const struct {
8325 simde__m128i a;
8326 simde__m128i r;
8327 } test_vec[8] = {
8328 { simde_mm_set_epi32(INT32_C( 559775826), INT32_C( -953548695), INT32_C( 811731668), INT32_C( 717258119)),
8329 simde_mm_set_epi32(INT32_C( 559775826), INT32_C( -953548695), INT32_C( 811731668), INT32_C( 717258119)) },
8330 { simde_mm_set_epi32(INT32_C( -819991397), INT32_C(-1367017296), INT32_C( 1998597245), INT32_C( -194600747)),
8331 simde_mm_set_epi32(INT32_C( -819991397), INT32_C(-1367017296), INT32_C( 1998597245), INT32_C( -194600747)) },
8332 { simde_mm_set_epi32(INT32_C(-1983970353), INT32_C( 1036245224), INT32_C( 1208146280), INT32_C( 2086212378)),
8333 simde_mm_set_epi32(INT32_C(-1983970353), INT32_C( 1036245224), INT32_C( 1208146280), INT32_C( 2086212378)) },
8334 { simde_mm_set_epi32(INT32_C(-1115487208), INT32_C( 1901412157), INT32_C( -373768038), INT32_C( 1379732008)),
8335 simde_mm_set_epi32(INT32_C(-1115487208), INT32_C( 1901412157), INT32_C( -373768038), INT32_C( 1379732008)) },
8336 { simde_mm_set_epi32(INT32_C( -772363216), INT32_C( 1208166493), INT32_C( 2006133231), INT32_C( -567476934)),
8337 simde_mm_set_epi32(INT32_C( -772363216), INT32_C( 1208166493), INT32_C( 2006133231), INT32_C( -567476934)) },
8338 { simde_mm_set_epi32(INT32_C( -117502444), INT32_C( 175751722), INT32_C(-1353399970), INT32_C( -281466966)),
8339 simde_mm_set_epi32(INT32_C( -117502444), INT32_C( 175751722), INT32_C(-1353399970), INT32_C( -281466966)) },
8340 { simde_mm_set_epi32(INT32_C( 2118723593), INT32_C(-1657083210), INT32_C( 1907402314), INT32_C( 669913338)),
8341 simde_mm_set_epi32(INT32_C( 2118723593), INT32_C(-1657083210), INT32_C( 1907402314), INT32_C( 669913338)) },
8342 { simde_mm_set_epi32(INT32_C( 372135232), INT32_C( 1779530333), INT32_C(-1088754891), INT32_C( 1773872281)),
8343 simde_mm_set_epi32(INT32_C( 372135232), INT32_C( 1779530333), INT32_C(-1088754891), INT32_C( 1773872281)) }
8344 };
8345
8346 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8347 simde__m128i r;
8348 simde_mm_storeu_si128(&r, test_vec[i].a);
8349 simde_assert_m128i_i64(r, ==, test_vec[i].r);
8350 }
8351
8352 return 0;
8353 }
8354
8355 static int
8356 test_simde_mm_storeu_si16 (SIMDE_MUNIT_TEST_ARGS) {
8357 static const struct {
8358 const int16_t a[8];
8359 const int16_t r;
8360 } test_vec[] = {
8361 { { -INT16_C( 6988), INT16_C( 26153), -INT16_C( 2289), -INT16_C( 8118), INT16_C( 29038), INT16_C( 9460), -INT16_C( 25910), -INT16_C( 2208) },
8362 -INT16_C( 6988) },
8363 { { INT16_C( 4717), -INT16_C( 14623), INT16_C( 14017), -INT16_C( 21548), INT16_C( 18960), -INT16_C( 20965), INT16_C( 24253), INT16_C( 29067) },
8364 INT16_C( 4717) },
8365 { { -INT16_C( 19389), INT16_C( 21207), INT16_C( 8619), INT16_C( 6450), INT16_C( 9874), INT16_C( 23869), -INT16_C( 25152), INT16_C( 11604) },
8366 -INT16_C( 19389) },
8367 { { INT16_C( 14000), INT16_C( 29171), -INT16_C( 14484), INT16_C( 31772), INT16_C( 14353), -INT16_C( 12758), -INT16_C( 19050), -INT16_C( 9920) },
8368 INT16_C( 14000) },
8369 { { INT16_C( 5993), INT16_C( 5163), INT16_C( 23865), -INT16_C( 13523), INT16_C( 27523), INT16_C( 17448), INT16_C( 32008), -INT16_C( 18319) },
8370 INT16_C( 5993) },
8371 { { INT16_C( 26035), INT16_C( 7977), INT16_C( 17964), INT16_C( 16027), -INT16_C( 14722), INT16_C( 5132), INT16_C( 19579), -INT16_C( 6674) },
8372 INT16_C( 26035) },
8373 { { INT16_C( 6500), -INT16_C( 25095), INT16_C( 10103), -INT16_C( 1432), -INT16_C( 28270), -INT16_C( 26050), -INT16_C( 20466), -INT16_C( 16045) },
8374 INT16_C( 6500) },
8375 { { INT16_C( 31765), INT16_C( 16864), INT16_C( 31682), INT16_C( 16511), -INT16_C( 29631), -INT16_C( 17067), INT16_C( 17368), INT16_C( 15522) },
8376 INT16_C( 31765) }
8377 };
8378
8379 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
8380 simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a);
8381 int16_t r;
8382 HEDLEY_CONCAT(simde,_mm_storeu_si16)(&r, a);
8383 simde_assert_equal_i16(r, test_vec[i].r);
8384 }
8385
8386 return 0;
8387 }
8388
8389 static int
8390 test_simde_mm_storeu_si32 (SIMDE_MUNIT_TEST_ARGS) {
8391 static const struct {
8392 const int32_t a[4];
8393 const int32_t r;
8394 } test_vec[] = {
8395 { { -INT32_C( 630341273), -INT32_C( 601100258), INT32_C( 527009452), INT32_C( 382213470) },
8396 -INT32_C( 630341273) },
8397 { { INT32_C( 733254901), INT32_C( 225181130), -INT32_C( 418546734), -INT32_C( 1459105470) },
8398 INT32_C( 733254901) },
8399 { { -INT32_C( 1333562222), INT32_C( 277655396), -INT32_C( 1825508043), INT32_C( 145356818) },
8400 -INT32_C( 1333562222) },
8401 { { INT32_C( 1446207116), INT32_C( 761503323), INT32_C( 1544843545), -INT32_C( 721085374) },
8402 INT32_C( 1446207116) },
8403 { { -INT32_C( 175797872), INT32_C( 1829048888), INT32_C( 436286727), -INT32_C( 1188910547) },
8404 -INT32_C( 175797872) },
8405 { { INT32_C( 1661949192), INT32_C( 227570676), INT32_C( 644457956), INT32_C( 1375432641) },
8406 INT32_C( 1661949192) },
8407 { { INT32_C( 809927160), -INT32_C( 1700967277), -INT32_C( 1347117439), INT32_C( 1365825097) },
8408 INT32_C( 809927160) },
8409 { { INT32_C( 548763692), -INT32_C( 819116565), -INT32_C( 1409968150), -INT32_C( 16912122) },
8410 INT32_C( 548763692) }
8411 };
8412
8413 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
8414 simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a);
8415 int32_t r;
8416 HEDLEY_CONCAT(simde,_mm_storeu_si32)(&r, a);
8417 simde_assert_equal_i32(r, test_vec[i].r);
8418 }
8419
8420 return 0;
8421 }
8422
8423 static int
8424 test_simde_mm_storeu_si64 (SIMDE_MUNIT_TEST_ARGS) {
8425 static const struct {
8426 const int64_t a[2];
8427 const int64_t r;
8428 } test_vec[] = {
8429 { { INT64_C( 1269957435069449074), INT64_C( 6198123151038108778) },
8430 INT64_C( 1269957435069449074) },
8431 { { -INT64_C( 1631810497504953952), -INT64_C( 5530541008416845765) },
8432 -INT64_C( 1631810497504953952) },
8433 { { -INT64_C( 6740103892576997931), -INT64_C( 59573331693324629) },
8434 -INT64_C( 6740103892576997931) },
8435 { { -INT64_C( 9008073061231320301), -INT64_C( 564917926918647499) },
8436 -INT64_C( 9008073061231320301) },
8437 { { -INT64_C( 1996551244505816721), INT64_C( 965994603972566793) },
8438 -INT64_C( 1996551244505816721) },
8439 { { INT64_C( 815745091936186761), -INT64_C( 8734544458042763860) },
8440 INT64_C( 815745091936186761) },
8441 { { INT64_C( 191535998296794507), -INT64_C( 3305974968983330281) },
8442 INT64_C( 191535998296794507) },
8443 { { INT64_C( 407001106525339075), INT64_C( 6676759969134880266) },
8444 INT64_C( 407001106525339075) }
8445 };
8446
8447 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
8448 simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a);
8449 int64_t r;
8450 HEDLEY_CONCAT(simde,_mm_storeu_si64)(&r, a);
8451 simde_assert_equal_i64(r, test_vec[i].r);
8452 }
8453
8454 return 0;
8455 }
8456
8457 static int
8458 test_simde_mm_store1_pd(SIMDE_MUNIT_TEST_ARGS) {
8459 const struct {
8460 simde__m128d a;
8461 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8462 } test_vec[8] = {
8463 { simde_mm_set_pd(SIMDE_FLOAT64_C( 291.96), SIMDE_FLOAT64_C( -70.45)),
8464 { SIMDE_FLOAT64_C( -70.45), SIMDE_FLOAT64_C( -70.45) } },
8465 { simde_mm_set_pd(SIMDE_FLOAT64_C( 896.84), SIMDE_FLOAT64_C( 840.00)),
8466 { SIMDE_FLOAT64_C( 840.00), SIMDE_FLOAT64_C( 840.00) } },
8467 { simde_mm_set_pd(SIMDE_FLOAT64_C( 792.86), SIMDE_FLOAT64_C( 559.02)),
8468 { SIMDE_FLOAT64_C( 559.02), SIMDE_FLOAT64_C( 559.02) } },
8469 { simde_mm_set_pd(SIMDE_FLOAT64_C( 401.40), SIMDE_FLOAT64_C( -245.84)),
8470 { SIMDE_FLOAT64_C( -245.84), SIMDE_FLOAT64_C( -245.84) } },
8471 { simde_mm_set_pd(SIMDE_FLOAT64_C( 441.21), SIMDE_FLOAT64_C( 731.20)),
8472 { SIMDE_FLOAT64_C( 731.20), SIMDE_FLOAT64_C( 731.20) } },
8473 { simde_mm_set_pd(SIMDE_FLOAT64_C( 569.26), SIMDE_FLOAT64_C( -434.33)),
8474 { SIMDE_FLOAT64_C( -434.33), SIMDE_FLOAT64_C( -434.33) } },
8475 { simde_mm_set_pd(SIMDE_FLOAT64_C( -796.24), SIMDE_FLOAT64_C( 534.91)),
8476 { SIMDE_FLOAT64_C( 534.91), SIMDE_FLOAT64_C( 534.91) } },
8477 { simde_mm_set_pd(SIMDE_FLOAT64_C( -221.59), SIMDE_FLOAT64_C( -372.35)),
8478 { SIMDE_FLOAT64_C( -372.35), SIMDE_FLOAT64_C( -372.35) } },
8479 };
8480
8481 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8482 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2] ;
8483 simde_mm_store1_pd(r, test_vec[i].a);
8484 simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8485 }
8486
8487 return 0;
8488 }
8489
8490 static int
8491 test_simde_mm_stream_pd(SIMDE_MUNIT_TEST_ARGS) {
8492 const struct {
8493 simde__m128d a;
8494 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8495 } test_vec[8] = {
8496 { simde_mm_set_pd(SIMDE_FLOAT64_C( -749.31), SIMDE_FLOAT64_C( -483.97)),
8497 {SIMDE_FLOAT64_C( -483.97), SIMDE_FLOAT64_C( -749.31) } },
8498 { simde_mm_set_pd(SIMDE_FLOAT64_C( 587.52), SIMDE_FLOAT64_C( -903.15)),
8499 {SIMDE_FLOAT64_C( -903.15), SIMDE_FLOAT64_C( 587.52) } },
8500 { simde_mm_set_pd(SIMDE_FLOAT64_C( -515.61), SIMDE_FLOAT64_C( 144.37)),
8501 {SIMDE_FLOAT64_C( 144.37), SIMDE_FLOAT64_C( -515.61) } },
8502 { simde_mm_set_pd(SIMDE_FLOAT64_C( -183.60), SIMDE_FLOAT64_C( 483.36)),
8503 {SIMDE_FLOAT64_C( 483.36), SIMDE_FLOAT64_C( -183.60) } },
8504 { simde_mm_set_pd(SIMDE_FLOAT64_C( 33.37), SIMDE_FLOAT64_C( -802.26)),
8505 {SIMDE_FLOAT64_C( -802.26), SIMDE_FLOAT64_C( 33.37) } },
8506 { simde_mm_set_pd(SIMDE_FLOAT64_C( -131.42), SIMDE_FLOAT64_C( -156.48)),
8507 {SIMDE_FLOAT64_C( -156.48), SIMDE_FLOAT64_C( -131.42) } },
8508 { simde_mm_set_pd(SIMDE_FLOAT64_C( -317.87), SIMDE_FLOAT64_C( 140.87)),
8509 {SIMDE_FLOAT64_C( 140.87), SIMDE_FLOAT64_C( -317.87) } },
8510 { simde_mm_set_pd(SIMDE_FLOAT64_C( 924.07), SIMDE_FLOAT64_C( 709.42)),
8511 {SIMDE_FLOAT64_C( 709.42), SIMDE_FLOAT64_C( 924.07) } }
8512 };
8513
8514 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8515 SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8516 simde_mm_stream_pd(r, test_vec[i].a);
8517 simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1);
8518 }
8519
8520 return 0;
8521 }
8522
8523 static int
8524 test_simde_mm_stream_si128(SIMDE_MUNIT_TEST_ARGS) {
8525 const struct {
8526 simde__m128i a;
8527 simde__m128i r;
8528 } test_vec[8] = {
8529 { simde_mm_set_epi32(INT32_C( 34091183), INT32_C( 572850908), INT32_C( 428781754), INT32_C(-1984722387)),
8530 simde_mm_set_epi32(INT32_C( 34091183), INT32_C( 572850908), INT32_C( 428781754), INT32_C(-1984722387)) },
8531 { simde_mm_set_epi32(INT32_C( 2059236852), INT32_C( 436410728), INT32_C( 338757718), INT32_C( 1985336145)),
8532 simde_mm_set_epi32(INT32_C( 2059236852), INT32_C( 436410728), INT32_C( 338757718), INT32_C( 1985336145)) },
8533 { simde_mm_set_epi32(INT32_C( -559686487), INT32_C( 981390363), INT32_C( 629822759), INT32_C( 26629572)),
8534 simde_mm_set_epi32(INT32_C( -559686487), INT32_C( 981390363), INT32_C( 629822759), INT32_C( 26629572)) },
8535 { simde_mm_set_epi32(INT32_C( 1401959784), INT32_C( -900492538), INT32_C( -328421218), INT32_C( 452144845)),
8536 simde_mm_set_epi32(INT32_C( 1401959784), INT32_C( -900492538), INT32_C( -328421218), INT32_C( 452144845)) },
8537 { simde_mm_set_epi32(INT32_C( 1914664610), INT32_C( 1467736241), INT32_C(-2062482935), INT32_C(-1765775255)),
8538 simde_mm_set_epi32(INT32_C( 1914664610), INT32_C( 1467736241), INT32_C(-2062482935), INT32_C(-1765775255)) },
8539 { simde_mm_set_epi32(INT32_C( 659730578), INT32_C( 874862437), INT32_C( -487086426), INT32_C(-1161523548)),
8540 simde_mm_set_epi32(INT32_C( 659730578), INT32_C( 874862437), INT32_C( -487086426), INT32_C(-1161523548)) },
8541 { simde_mm_set_epi32(INT32_C( 1195652072), INT32_C( -415424127), INT32_C( 77100736), INT32_C( 1699618155)),
8542 simde_mm_set_epi32(INT32_C( 1195652072), INT32_C( -415424127), INT32_C( 77100736), INT32_C( 1699618155)) },
8543 { simde_mm_set_epi32(INT32_C( 1626943139), INT32_C( 1327578602), INT32_C(-1477047999), INT32_C( 1569415359)),
8544 simde_mm_set_epi32(INT32_C( 1626943139), INT32_C( 1327578602), INT32_C(-1477047999), INT32_C( 1569415359)) }
8545 };
8546
8547 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8548 simde__m128i r;
8549 simde_mm_stream_si128(&r, test_vec[i].a);
8550 simde_assert_m128i_i32(r, ==, test_vec[i].r);
8551 }
8552
8553 return 0;
8554 }
8555
8556 static int
8557 test_simde_mm_stream_si32(SIMDE_MUNIT_TEST_ARGS) {
8558 const struct {
8559 int32_t a;
8560 int32_t r;
8561 } test_vec[8] = {
8562 { -895547977, -895547977 },
8563 { 1712937231, 1712937231 },
8564 { -1086654689, -1086654689 },
8565 { 1855506850, 1855506850 },
8566 { 1870001810, 1870001810 },
8567 { -396094407, -396094407 },
8568 { -1262223993, -1262223993 },
8569 { 2015532253, 2015532253 }
8570 };
8571
8572 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8573 int32_t r;
8574 simde_mm_stream_si32(&r, test_vec[i].a);
8575 simde_assert_equal_i32(r, test_vec[i].r);
8576 }
8577
8578 return 0;
8579 }
8580
8581 static int
8582 test_simde_mm_stream_si64(SIMDE_MUNIT_TEST_ARGS) {
8583 const struct {
8584 int64_t a;
8585 int64_t r;
8586 } test_vec[8] = {
8587 { INT64_C( -908741869362791955), INT64_C( -908741869362791955) },
8588 { INT64_C( 6977779886002528513), INT64_C( 6977779886002528513) },
8589 { INT64_C(-3803748866185605675), INT64_C(-3803748866185605675) },
8590 { INT64_C( 9126491633461219066), INT64_C( 9126491633461219066) },
8591 { INT64_C(-1680016917440909978), INT64_C(-1680016917440909978) },
8592 { INT64_C( 9194247506078439345), INT64_C( 9194247506078439345) },
8593 { INT64_C(-5911248664473270680), INT64_C(-5911248664473270680) },
8594 { INT64_C(-9131883318362768052), INT64_C(-9131883318362768052) }
8595 };
8596
8597 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8598 int64_t r;
8599
8600 #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_NATIVE_ALIASES_TESTING)
8601 simde_mm_stream_si64(HEDLEY_REINTERPRET_CAST(long long int*, &r), test_vec[i].a);
8602 #else
8603 simde_mm_stream_si64(&r, test_vec[i].a);
8604 #endif
8605
8606 simde_assert_equal_i64(r, test_vec[i].r);
8607 }
8608
8609 return 0;
8610 }
8611
8612 static int
8613 test_simde_mm_sub_epi8(SIMDE_MUNIT_TEST_ARGS) {
8614 const struct {
8615 simde__m128i a;
8616 simde__m128i b;
8617 simde__m128i r;
8618 } test_vec[8] = {
8619 { simde_mm_set_epi8(INT8_C(-114), INT8_C(-102), INT8_C( -21), INT8_C( 93),
8620 INT8_C(-120), INT8_C( 125), INT8_C( -36), INT8_C( -23),
8621 INT8_C( -12), INT8_C( 11), INT8_C( 22), INT8_C( 78),
8622 INT8_C( -16), INT8_C( 57), INT8_C( 71), INT8_C( 112)),
8623 simde_mm_set_epi8(INT8_C( -15), INT8_C( -47), INT8_C( 8), INT8_C( 73),
8624 INT8_C( 45), INT8_C( 37), INT8_C( 27), INT8_C( -63),
8625 INT8_C( -74), INT8_C( -76), INT8_C( -34), INT8_C( 78),
8626 INT8_C( -50), INT8_C( 121), INT8_C(-113), INT8_C(-123)),
8627 simde_mm_set_epi8(INT8_C( -99), INT8_C( -55), INT8_C( -29), INT8_C( 20),
8628 INT8_C( 91), INT8_C( 88), INT8_C( -63), INT8_C( 40),
8629 INT8_C( 62), INT8_C( 87), INT8_C( 56), INT8_C( 0),
8630 INT8_C( 34), INT8_C( -64), INT8_C( -72), INT8_C( -21)) },
8631 { simde_mm_set_epi8(INT8_C( -71), INT8_C( -77), INT8_C( -40), INT8_C( 99),
8632 INT8_C( -40), INT8_C( 24), INT8_C( 45), INT8_C( 125),
8633 INT8_C( 16), INT8_C( 82), INT8_C( -66), INT8_C( -93),
8634 INT8_C( 92), INT8_C( 60), INT8_C( 65), INT8_C( 70)),
8635 simde_mm_set_epi8(INT8_C( -15), INT8_C( -11), INT8_C( 41), INT8_C( 35),
8636 INT8_C( 87), INT8_C( -22), INT8_C( -28), INT8_C( -74),
8637 INT8_C( 88), INT8_C(-100), INT8_C( 28), INT8_C( -30),
8638 INT8_C( 122), INT8_C( -93), INT8_C( -11), INT8_C( 47)),
8639 simde_mm_set_epi8(INT8_C( -56), INT8_C( -66), INT8_C( -81), INT8_C( 64),
8640 INT8_C(-127), INT8_C( 46), INT8_C( 73), INT8_C( -57),
8641 INT8_C( -72), INT8_C( -74), INT8_C( -94), INT8_C( -63),
8642 INT8_C( -30), INT8_C(-103), INT8_C( 76), INT8_C( 23)) },
8643 { simde_mm_set_epi8(INT8_C( 88), INT8_C( 7), INT8_C( 21), INT8_C( 40),
8644 INT8_C( -45), INT8_C( -52), INT8_C( 105), INT8_C( 9),
8645 INT8_C( -65), INT8_C( -48), INT8_C( 74), INT8_C( -11),
8646 INT8_C( 71), INT8_C( -73), INT8_C( -92), INT8_C(-128)),
8647 simde_mm_set_epi8(INT8_C( 51), INT8_C( -25), INT8_C( 14), INT8_C( -31),
8648 INT8_C( -75), INT8_C( 81), INT8_C( 123), INT8_C( -32),
8649 INT8_C( -73), INT8_C(-121), INT8_C( 36), INT8_C( -43),
8650 INT8_C( 95), INT8_C( -5), INT8_C( 71), INT8_C( -67)),
8651 simde_mm_set_epi8(INT8_C( 37), INT8_C( 32), INT8_C( 7), INT8_C( 71),
8652 INT8_C( 30), INT8_C( 123), INT8_C( -18), INT8_C( 41),
8653 INT8_C( 8), INT8_C( 73), INT8_C( 38), INT8_C( 32),
8654 INT8_C( -24), INT8_C( -68), INT8_C( 93), INT8_C( -61)) },
8655 { simde_mm_set_epi8(INT8_C( -26), INT8_C( -30), INT8_C(-127), INT8_C( -96),
8656 INT8_C( -93), INT8_C( 85), INT8_C( -61), INT8_C( 31),
8657 INT8_C( 84), INT8_C( 86), INT8_C( 14), INT8_C( 51),
8658 INT8_C( -75), INT8_C( -80), INT8_C( 35), INT8_C( 49)),
8659 simde_mm_set_epi8(INT8_C(-102), INT8_C( 55), INT8_C( 103), INT8_C( 19),
8660 INT8_C(-107), INT8_C( -66), INT8_C(-128), INT8_C( 92),
8661 INT8_C(-108), INT8_C( -59), INT8_C( -55), INT8_C( 84),
8662 INT8_C( -42), INT8_C( 42), INT8_C( -85), INT8_C( -73)),
8663 simde_mm_set_epi8(INT8_C( 76), INT8_C( -85), INT8_C( 26), INT8_C(-115),
8664 INT8_C( 14), INT8_C(-105), INT8_C( 67), INT8_C( -61),
8665 INT8_C( -64), INT8_C(-111), INT8_C( 69), INT8_C( -33),
8666 INT8_C( -33), INT8_C(-122), INT8_C( 120), INT8_C( 122)) },
8667 { simde_mm_set_epi8(INT8_C( -92), INT8_C( 56), INT8_C( -22), INT8_C( -76),
8668 INT8_C( -77), INT8_C(-116), INT8_C( -11), INT8_C( 34),
8669 INT8_C( -7), INT8_C( 37), INT8_C( -64), INT8_C( -72),
8670 INT8_C( 28), INT8_C(-107), INT8_C(-128), INT8_C(-117)),
8671 simde_mm_set_epi8(INT8_C( -56), INT8_C( -3), INT8_C( 32), INT8_C( 22),
8672 INT8_C( 49), INT8_C(-125), INT8_C( 122), INT8_C( -3),
8673 INT8_C(-111), INT8_C( 65), INT8_C( -17), INT8_C( 15),
8674 INT8_C( -83), INT8_C( -49), INT8_C( 13), INT8_C( 98)),
8675 simde_mm_set_epi8(INT8_C( -36), INT8_C( 59), INT8_C( -54), INT8_C( -98),
8676 INT8_C(-126), INT8_C( 9), INT8_C( 123), INT8_C( 37),
8677 INT8_C( 104), INT8_C( -28), INT8_C( -47), INT8_C( -87),
8678 INT8_C( 111), INT8_C( -58), INT8_C( 115), INT8_C( 41)) },
8679 { simde_mm_set_epi8(INT8_C(-104), INT8_C( 9), INT8_C( 90), INT8_C( -26),
8680 INT8_C(-114), INT8_C(-100), INT8_C( -19), INT8_C( 82),
8681 INT8_C( 96), INT8_C( 58), INT8_C( 39), INT8_C( 9),
8682 INT8_C( -4), INT8_C( 91), INT8_C( -93), INT8_C( -73)),
8683 simde_mm_set_epi8(INT8_C( 16), INT8_C( 2), INT8_C( -9), INT8_C( 107),
8684 INT8_C(-122), INT8_C(-106), INT8_C( -7), INT8_C( 11),
8685 INT8_C( 116), INT8_C( -40), INT8_C( -9), INT8_C( -94),
8686 INT8_C( 61), INT8_C( -90), INT8_C( 69), INT8_C( 0)),
8687 simde_mm_set_epi8(INT8_C(-120), INT8_C( 7), INT8_C( 99), INT8_C( 123),
8688 INT8_C( 8), INT8_C( 6), INT8_C( -12), INT8_C( 71),
8689 INT8_C( -20), INT8_C( 98), INT8_C( 48), INT8_C( 103),
8690 INT8_C( -65), INT8_C( -75), INT8_C( 94), INT8_C( -73)) },
8691 { simde_mm_set_epi8(INT8_C( -61), INT8_C( -71), INT8_C( 103), INT8_C( 76),
8692 INT8_C( 44), INT8_C( 98), INT8_C( 70), INT8_C(-120),
8693 INT8_C( 17), INT8_C( 126), INT8_C( -43), INT8_C( 108),
8694 INT8_C( -31), INT8_C( 12), INT8_C( -92), INT8_C( -28)),
8695 simde_mm_set_epi8(INT8_C(-114), INT8_C( 71), INT8_C( -5), INT8_C( -9),
8696 INT8_C( -6), INT8_C( 117), INT8_C( -23), INT8_C( -62),
8697 INT8_C( -10), INT8_C( -22), INT8_C( 106), INT8_C( 35),
8698 INT8_C( -63), INT8_C( 18), INT8_C( 58), INT8_C( 22)),
8699 simde_mm_set_epi8(INT8_C( 53), INT8_C( 114), INT8_C( 108), INT8_C( 85),
8700 INT8_C( 50), INT8_C( -19), INT8_C( 93), INT8_C( -58),
8701 INT8_C( 27), INT8_C(-108), INT8_C( 107), INT8_C( 73),
8702 INT8_C( 32), INT8_C( -6), INT8_C( 106), INT8_C( -50)) },
8703 { simde_mm_set_epi8(INT8_C( 19), INT8_C( -54), INT8_C( 71), INT8_C( 0),
8704 INT8_C( -13), INT8_C( 85), INT8_C( 113), INT8_C( 7),
8705 INT8_C( -78), INT8_C(-122), INT8_C( -69), INT8_C( -15),
8706 INT8_C( -57), INT8_C( -9), INT8_C(-125), INT8_C( 84)),
8707 simde_mm_set_epi8(INT8_C( -78), INT8_C( 106), INT8_C(-106), INT8_C( 60),
8708 INT8_C( 36), INT8_C( 103), INT8_C( -55), INT8_C( 69),
8709 INT8_C(-119), INT8_C( -53), INT8_C( 67), INT8_C( -86),
8710 INT8_C( -37), INT8_C( -20), INT8_C( -58), INT8_C( -28)),
8711 simde_mm_set_epi8(INT8_C( 97), INT8_C( 96), INT8_C( -79), INT8_C( -60),
8712 INT8_C( -49), INT8_C( -18), INT8_C( -88), INT8_C( -62),
8713 INT8_C( 41), INT8_C( -69), INT8_C( 120), INT8_C( 71),
8714 INT8_C( -20), INT8_C( 11), INT8_C( -67), INT8_C( 112)) }
8715 };
8716
8717 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8718 simde__m128i r = simde_mm_sub_epi8(test_vec[i].a, test_vec[i].b);
8719 simde_assert_m128i_i8(r, ==, test_vec[i].r);
8720 }
8721
8722 return 0;
8723 }
8724
8725 static int
8726 test_simde_mm_sub_epi16(SIMDE_MUNIT_TEST_ARGS) {
8727 const struct {
8728 simde__m128i a;
8729 simde__m128i b;
8730 simde__m128i r;
8731 } test_vec[8] = {
8732 { simde_mm_set_epi16(INT16_C( 4649), INT16_C(-10562), INT16_C(-25917), INT16_C( 9425),
8733 INT16_C( 27983), INT16_C( -7888), INT16_C( 3337), INT16_C(-19608)),
8734 simde_mm_set_epi16(INT16_C(-13637), INT16_C( -2631), INT16_C( 26607), INT16_C( 19784),
8735 INT16_C(-32723), INT16_C(-19066), INT16_C( 18245), INT16_C(-23690)),
8736 simde_mm_set_epi16(INT16_C( 18286), INT16_C( -7931), INT16_C( 13012), INT16_C(-10359),
8737 INT16_C( -4830), INT16_C( 11178), INT16_C(-14908), INT16_C( 4082)) },
8738 { simde_mm_set_epi16(INT16_C( 708), INT16_C( 11434), INT16_C( -1239), INT16_C(-25521),
8739 INT16_C(-21333), INT16_C( 14389), INT16_C( 1705), INT16_C( 20680)),
8740 simde_mm_set_epi16(INT16_C(-28483), INT16_C( 8156), INT16_C(-22073), INT16_C( 17984),
8741 INT16_C( 20902), INT16_C( 3569), INT16_C( 31387), INT16_C( 7806)),
8742 simde_mm_set_epi16(INT16_C( 29191), INT16_C( 3278), INT16_C( 20834), INT16_C( 22031),
8743 INT16_C( 23301), INT16_C( 10820), INT16_C(-29682), INT16_C( 12874)) },
8744 { simde_mm_set_epi16(INT16_C( -3626), INT16_C( 757), INT16_C( 189), INT16_C(-19968),
8745 INT16_C( 5676), INT16_C( 7663), INT16_C( 8524), INT16_C( 15372)),
8746 simde_mm_set_epi16(INT16_C( 20254), INT16_C(-31977), INT16_C( 18332), INT16_C(-14379),
8747 INT16_C( -7613), INT16_C( 19737), INT16_C( 22035), INT16_C( -6952)),
8748 simde_mm_set_epi16(INT16_C(-23880), INT16_C( 32734), INT16_C(-18143), INT16_C( -5589),
8749 INT16_C( 13289), INT16_C(-12074), INT16_C(-13511), INT16_C( 22324)) },
8750 { simde_mm_set_epi16(INT16_C(-12411), INT16_C( 25999), INT16_C( 8485), INT16_C( -8542),
8751 INT16_C( 21018), INT16_C(-31213), INT16_C( 15766), INT16_C( 18574)),
8752 simde_mm_set_epi16(INT16_C( 6484), INT16_C(-10154), INT16_C( 20175), INT16_C( 32085),
8753 INT16_C( 18950), INT16_C(-19405), INT16_C(-12089), INT16_C( 8199)),
8754 simde_mm_set_epi16(INT16_C(-18895), INT16_C(-29383), INT16_C(-11690), INT16_C( 24909),
8755 INT16_C( 2068), INT16_C(-11808), INT16_C( 27855), INT16_C( 10375)) },
8756 { simde_mm_set_epi16(INT16_C( 7148), INT16_C(-25537), INT16_C( 5647), INT16_C(-25529),
8757 INT16_C( -5324), INT16_C(-12025), INT16_C( 27072), INT16_C(-30360)),
8758 simde_mm_set_epi16(INT16_C(-24506), INT16_C( -9630), INT16_C( 25801), INT16_C( 32734),
8759 INT16_C( 1516), INT16_C( 10059), INT16_C( 10693), INT16_C( 13623)),
8760 simde_mm_set_epi16(INT16_C( 31654), INT16_C(-15907), INT16_C(-20154), INT16_C( 7273),
8761 INT16_C( -6840), INT16_C(-22084), INT16_C( 16379), INT16_C( 21553)) },
8762 { simde_mm_set_epi16(INT16_C(-24730), INT16_C(-23496), INT16_C(-16567), INT16_C(-13323),
8763 INT16_C(-12986), INT16_C(-31808), INT16_C( 27730), INT16_C( -2264)),
8764 simde_mm_set_epi16(INT16_C(-13737), INT16_C(-18451), INT16_C(-16289), INT16_C( 22307),
8765 INT16_C( -2961), INT16_C( 22412), INT16_C( 13917), INT16_C( 8259)),
8766 simde_mm_set_epi16(INT16_C(-10993), INT16_C( -5045), INT16_C( -278), INT16_C( 29906),
8767 INT16_C(-10025), INT16_C( 11316), INT16_C( 13813), INT16_C(-10523)) },
8768 { simde_mm_set_epi16(INT16_C( 5718), INT16_C( 31027), INT16_C( 29094), INT16_C( 1906),
8769 INT16_C( -3938), INT16_C( -2339), INT16_C(-13536), INT16_C( 11931)),
8770 simde_mm_set_epi16(INT16_C(-23545), INT16_C( 2546), INT16_C( -2953), INT16_C( -8072),
8771 INT16_C( 28237), INT16_C(-11239), INT16_C(-13996), INT16_C( 29497)),
8772 simde_mm_set_epi16(INT16_C( 29263), INT16_C( 28481), INT16_C( 32047), INT16_C( 9978),
8773 INT16_C(-32175), INT16_C( 8900), INT16_C( 460), INT16_C(-17566)) },
8774 { simde_mm_set_epi16(INT16_C( 29491), INT16_C(-30965), INT16_C( 4748), INT16_C(-28809),
8775 INT16_C(-21877), INT16_C(-21669), INT16_C(-28233), INT16_C(-28758)),
8776 simde_mm_set_epi16(INT16_C( 5029), INT16_C( 4694), INT16_C(-16956), INT16_C(-15561),
8777 INT16_C(-23049), INT16_C(-31774), INT16_C( 3835), INT16_C(-12557)),
8778 simde_mm_set_epi16(INT16_C( 24462), INT16_C( 29877), INT16_C( 21704), INT16_C(-13248),
8779 INT16_C( 1172), INT16_C( 10105), INT16_C(-32068), INT16_C(-16201)) }
8780 };
8781
8782 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8783 simde__m128i r = simde_mm_sub_epi16(test_vec[i].a, test_vec[i].b);
8784 simde_assert_m128i_i16(r, ==, test_vec[i].r);
8785 }
8786
8787 return 0;
8788 }
8789
8790 static int
8791 test_simde_mm_sub_epi32(SIMDE_MUNIT_TEST_ARGS) {
8792 const struct {
8793 simde__m128i a;
8794 simde__m128i b;
8795 simde__m128i r;
8796 } test_vec[8] = {
8797 { simde_mm_set_epi32( 304731838, -1698487087, 1833951536, 218739560),
8798 simde_mm_set_epi32( -893651527, 1743736136, -2144488058, 1195746166),
8799 simde_mm_set_epi32( 1198383365, 852744073, -316527702, -977006606) },
8800 { simde_mm_set_epi32( 46410922, -81159089, -1398065099, 111759560),
8801 simde_mm_set_epi32(-1866653732, -1446558144, 1369837041, 2056986238),
8802 simde_mm_set_epi32( 1913064654, 1365399055, 1527065156, -1945226678) },
8803 { simde_mm_set_epi32( -237632779, 12431872, 371989999, 558644236),
8804 simde_mm_set_epi32( 1327399703, 1201457109, -498905831, 1444144344),
8805 simde_mm_set_epi32(-1565032482, -1189025237, 870895830, -885500108) },
8806 { simde_mm_set_epi32( -813341297, 556129954, 1377469971, 1033259150),
8807 simde_mm_set_epi32( 424990806, 1322220885, 1241953331, -792256505),
8808 simde_mm_set_epi32(-1238332103, -766090931, 135516640, 1825515655) },
8809 { simde_mm_set_epi32( 468491327, 370121799, -348860153, 1774225768),
8810 simde_mm_set_epi32(-1605969310, 1690927070, 99362635, 700790071),
8811 simde_mm_set_epi32( 2074460637, -1320805271, -448222788, 1073435697) },
8812 { simde_mm_set_epi32(-1620663240, -1085682699, -851016768, 1817376552),
8813 simde_mm_set_epi32( -900220947, -1067493597, -194029684, 912072771),
8814 simde_mm_set_epi32( -720442293, -18189102, -656987084, 905303781) },
8815 { simde_mm_set_epi32( 374765875, 1906706290, -258017571, -887083365),
8816 simde_mm_set_epi32(-1543042574, -193470344, 1850594329, -917212359),
8817 simde_mm_set_epi32( 1917808449, 2100176634, -2108611900, 30128994) },
8818 { simde_mm_set_epi32( 1932756747, 311201655, -1433687205, -1850241110),
8819 simde_mm_set_epi32( 329585238, -1111178441, -1510505502, 251383539),
8820 simde_mm_set_epi32( 1603171509, 1422380096, 76818297, -2101624649) }
8821 };
8822
8823 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8824 simde__m128i r = simde_mm_sub_epi32(test_vec[i].a, test_vec[i].b);
8825 simde_assert_m128i_i32(r, ==, test_vec[i].r);
8826 }
8827
8828 return 0;
8829 }
8830
8831 static int
8832 test_simde_mm_sub_epi64(SIMDE_MUNIT_TEST_ARGS) {
8833 const struct {
8834 simde__m128i a;
8835 simde__m128i b;
8836 simde__m128i r;
8837 } test_vec[8] = {
8838 { simde_mm_set_epi64x(INT64_C(-5763845342482697816), INT64_C( 2103077785434280804)),
8839 simde_mm_set_epi64x(INT64_C( -84933559585222060), INT64_C(-2626653918467514964)),
8840 simde_mm_set_epi64x(INT64_C(-5678911782897475756), INT64_C( 4729731703901795768)) },
8841 { simde_mm_set_epi64x(INT64_C( 1527789798480118137), INT64_C( 8436112421047310932)),
8842 simde_mm_set_epi64x(INT64_C(-1700732467797798250), INT64_C(-3973336518996013340)),
8843 simde_mm_set_epi64x(INT64_C( 3228522266277916387), INT64_C(-6037295133666227344)) },
8844 { simde_mm_set_epi64x(INT64_C( 4851345631989659335), INT64_C(-7206764788471565568)),
8845 simde_mm_set_epi64x(INT64_C( 5406657072094052149), INT64_C( 1553917979932899417)),
8846 simde_mm_set_epi64x(INT64_C( -555311440104392814), INT64_C(-8760682768404464985)) },
8847 { simde_mm_set_epi64x(INT64_C( 4880585840903485916), INT64_C(-3214111508108965857)),
8848 simde_mm_set_epi64x(INT64_C(-9030069389987018552), INT64_C(-3395779442469856546)),
8849 simde_mm_set_epi64x(INT64_C(-4536088842819047148), INT64_C( 181667934360890689)) },
8850 { simde_mm_set_epi64x(INT64_C( 5848110560047382754), INT64_C( 5491947693722128435)),
8851 simde_mm_set_epi64x(INT64_C( 213782131019667117), INT64_C( -937970910639813333)),
8852 simde_mm_set_epi64x(INT64_C( 5634328429027715637), INT64_C( 6429918604361941768)) },
8853 { simde_mm_set_epi64x(INT64_C(-1470278109522038956), INT64_C(-7185464081677005028)),
8854 simde_mm_set_epi64x(INT64_C( 7512013344600346304), INT64_C(-1151368750409397152)),
8855 simde_mm_set_epi64x(INT64_C(-8982291454122385260), INT64_C(-6034095331267607876)) },
8856 { simde_mm_set_epi64x(INT64_C(-1414880571892272072), INT64_C(-5464559564131319132)),
8857 simde_mm_set_epi64x(INT64_C(-8460263392275774431), INT64_C( 3444946385257741717)),
8858 simde_mm_set_epi64x(INT64_C( 7045382820383502359), INT64_C(-8909505949389060849)) },
8859 { simde_mm_set_epi64x(INT64_C( 2041037654020608990), INT64_C(-5135476174064773616)),
8860 simde_mm_set_epi64x(INT64_C(-2250411574230731306), INT64_C( 6301008926808412830)),
8861 simde_mm_set_epi64x(INT64_C( 4291449228251340296), INT64_C( 7010258972836365170)) }
8862 };
8863
8864 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8865 simde__m128i r = simde_mm_sub_epi64(test_vec[i].a, test_vec[i].b);
8866 simde_assert_m128i_i64(r, ==, test_vec[i].r);
8867 }
8868
8869 return 0;
8870 }
8871
8872 static int
8873 test_simde_mm_sub_pd(SIMDE_MUNIT_TEST_ARGS) {
8874 const struct {
8875 simde__m128d a;
8876 simde__m128d b;
8877 simde__m128d r;
8878 } test_vec[8] = {
8879 { simde_mm_set_pd(SIMDE_FLOAT64_C( -989.09), SIMDE_FLOAT64_C( 415.70)),
8880 simde_mm_set_pd(SIMDE_FLOAT64_C( -630.71), SIMDE_FLOAT64_C( 755.53)),
8881 simde_mm_set_pd(SIMDE_FLOAT64_C( -358.38), SIMDE_FLOAT64_C( -339.83)) },
8882 { simde_mm_set_pd(SIMDE_FLOAT64_C( -609.69), SIMDE_FLOAT64_C( -266.09)),
8883 simde_mm_set_pd(SIMDE_FLOAT64_C( 904.74), SIMDE_FLOAT64_C( 704.00)),
8884 simde_mm_set_pd(SIMDE_FLOAT64_C(-1514.43), SIMDE_FLOAT64_C( -970.09)) },
8885 { simde_mm_set_pd(SIMDE_FLOAT64_C( -864.69), SIMDE_FLOAT64_C( -728.75)),
8886 simde_mm_set_pd(SIMDE_FLOAT64_C( -549.96), SIMDE_FLOAT64_C( 478.05)),
8887 simde_mm_set_pd(SIMDE_FLOAT64_C( -314.73), SIMDE_FLOAT64_C(-1206.80)) },
8888 { simde_mm_set_pd(SIMDE_FLOAT64_C( -607.45), SIMDE_FLOAT64_C( -593.32)),
8889 simde_mm_set_pd(SIMDE_FLOAT64_C( -648.70), SIMDE_FLOAT64_C( -195.04)),
8890 simde_mm_set_pd(SIMDE_FLOAT64_C( 41.24), SIMDE_FLOAT64_C( -398.28)) },
8891 { simde_mm_set_pd(SIMDE_FLOAT64_C( -442.58), SIMDE_FLOAT64_C( -296.11)),
8892 simde_mm_set_pd(SIMDE_FLOAT64_C( 195.46), SIMDE_FLOAT64_C( 287.25)),
8893 simde_mm_set_pd(SIMDE_FLOAT64_C( -638.04), SIMDE_FLOAT64_C( -583.37)) },
8894 { simde_mm_set_pd(SIMDE_FLOAT64_C( -930.71), SIMDE_FLOAT64_C( 996.22)),
8895 simde_mm_set_pd(SIMDE_FLOAT64_C( -786.74), SIMDE_FLOAT64_C( 77.74)),
8896 simde_mm_set_pd(SIMDE_FLOAT64_C( -143.98), SIMDE_FLOAT64_C( 918.47)) },
8897 { simde_mm_set_pd(SIMDE_FLOAT64_C( 702.75), SIMDE_FLOAT64_C( -28.87)),
8898 simde_mm_set_pd(SIMDE_FLOAT64_C( 970.37), SIMDE_FLOAT64_C( -443.97)),
8899 simde_mm_set_pd(SIMDE_FLOAT64_C( -267.62), SIMDE_FLOAT64_C( 415.10)) },
8900 { simde_mm_set_pd(SIMDE_FLOAT64_C( -369.79), SIMDE_FLOAT64_C( 539.64)),
8901 simde_mm_set_pd(SIMDE_FLOAT64_C( -404.57), SIMDE_FLOAT64_C( -587.93)),
8902 simde_mm_set_pd(SIMDE_FLOAT64_C( 34.78), SIMDE_FLOAT64_C( 1127.56)) }
8903 };
8904
8905 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8906 simde__m128d r = simde_mm_sub_pd(test_vec[i].a, test_vec[i].b);
8907 simde_assert_m128d_close(r, test_vec[i].r, 1);
8908 }
8909
8910 return 0;
8911 }
8912
8913 static int
8914 test_simde_mm_sub_sd(SIMDE_MUNIT_TEST_ARGS) {
8915 const struct {
8916 simde__m128d a;
8917 simde__m128d b;
8918 simde__m128d r;
8919 } test_vec[8] = {
8920 { simde_mm_set_pd(SIMDE_FLOAT64_C( -989.09), SIMDE_FLOAT64_C( 415.70)),
8921 simde_mm_set_pd(SIMDE_FLOAT64_C( -630.71), SIMDE_FLOAT64_C( 755.53)),
8922 simde_mm_set_pd(SIMDE_FLOAT64_C( -989.09), SIMDE_FLOAT64_C( -339.83)) },
8923 { simde_mm_set_pd(SIMDE_FLOAT64_C( -609.69), SIMDE_FLOAT64_C( -266.09)),
8924 simde_mm_set_pd(SIMDE_FLOAT64_C( 904.74), SIMDE_FLOAT64_C( 704.00)),
8925 simde_mm_set_pd(SIMDE_FLOAT64_C( -609.69), SIMDE_FLOAT64_C( -970.09)) },
8926 { simde_mm_set_pd(SIMDE_FLOAT64_C( -864.69), SIMDE_FLOAT64_C( -728.75)),
8927 simde_mm_set_pd(SIMDE_FLOAT64_C( -549.96), SIMDE_FLOAT64_C( 478.05)),
8928 simde_mm_set_pd(SIMDE_FLOAT64_C( -864.69), SIMDE_FLOAT64_C(-1206.80)) },
8929 { simde_mm_set_pd(SIMDE_FLOAT64_C( -607.45), SIMDE_FLOAT64_C( -593.32)),
8930 simde_mm_set_pd(SIMDE_FLOAT64_C( -648.70), SIMDE_FLOAT64_C( -195.04)),
8931 simde_mm_set_pd(SIMDE_FLOAT64_C( -607.45), SIMDE_FLOAT64_C( -398.28)) },
8932 { simde_mm_set_pd(SIMDE_FLOAT64_C( -442.58), SIMDE_FLOAT64_C( -296.11)),
8933 simde_mm_set_pd(SIMDE_FLOAT64_C( 195.46), SIMDE_FLOAT64_C( 287.25)),
8934 simde_mm_set_pd(SIMDE_FLOAT64_C( -442.58), SIMDE_FLOAT64_C( -583.37)) },
8935 { simde_mm_set_pd(SIMDE_FLOAT64_C( -930.71), SIMDE_FLOAT64_C( 996.22)),
8936 simde_mm_set_pd(SIMDE_FLOAT64_C( -786.74), SIMDE_FLOAT64_C( 77.74)),
8937 simde_mm_set_pd(SIMDE_FLOAT64_C( -930.71), SIMDE_FLOAT64_C( 918.47)) },
8938 { simde_mm_set_pd(SIMDE_FLOAT64_C( 702.75), SIMDE_FLOAT64_C( -28.87)),
8939 simde_mm_set_pd(SIMDE_FLOAT64_C( 970.37), SIMDE_FLOAT64_C( -443.97)),
8940 simde_mm_set_pd(SIMDE_FLOAT64_C( 702.75), SIMDE_FLOAT64_C( 415.10)) },
8941 { simde_mm_set_pd(SIMDE_FLOAT64_C( -369.79), SIMDE_FLOAT64_C( 539.64)),
8942 simde_mm_set_pd(SIMDE_FLOAT64_C( -404.57), SIMDE_FLOAT64_C( -587.93)),
8943 simde_mm_set_pd(SIMDE_FLOAT64_C( -369.79), SIMDE_FLOAT64_C( 1127.56)) }
8944 };
8945
8946 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8947 simde__m128d r = simde_mm_sub_sd(test_vec[i].a, test_vec[i].b);
8948 simde_assert_m128d_close(r, test_vec[i].r, 1);
8949 }
8950
8951 return 0;
8952 }
8953
8954 static int
8955 test_simde_mm_sub_si64(SIMDE_MUNIT_TEST_ARGS) {
8956 const struct {
8957 simde__m64 a;
8958 simde__m64 b;
8959 simde__m64 r;
8960 } test_vec[8] = {
8961 { simde_mm_cvtsi64_m64(INT64_C( -40015113898169895)),
8962 simde_mm_cvtsi64_m64(INT64_C( 3843942487505240466)),
8963 simde_mm_cvtsi64_m64(INT64_C(-3883957601403410361)), },
8964 { simde_mm_cvtsi64_m64(INT64_C( 8317116700671824816)),
8965 simde_mm_cvtsi64_m64(INT64_C( 2891842609034633421)),
8966 simde_mm_cvtsi64_m64(INT64_C( 5425274091637191395)), },
8967 { simde_mm_cvtsi64_m64(INT64_C( 922042182678065366)),
8968 simde_mm_cvtsi64_m64(INT64_C( 4937799652981992213)),
8969 simde_mm_cvtsi64_m64(INT64_C(-4015757470303926847)), },
8970 { simde_mm_cvtsi64_m64(INT64_C( 297526191920431793)),
8971 simde_mm_cvtsi64_m64(INT64_C(-8568639315346032946)),
8972 simde_mm_cvtsi64_m64(INT64_C( 8866165507266464739)), },
8973 { simde_mm_cvtsi64_m64(INT64_C( 944913740190663659)),
8974 simde_mm_cvtsi64_m64(INT64_C(-5569388163200780530)),
8975 simde_mm_cvtsi64_m64(INT64_C( 6514301903391444189)), },
8976 { simde_mm_cvtsi64_m64(INT64_C( 2756927115722410076)),
8977 simde_mm_cvtsi64_m64(INT64_C( 1302679549898517242)),
8978 simde_mm_cvtsi64_m64(INT64_C( 1454247565823892834)), },
8979 { simde_mm_cvtsi64_m64(INT64_C( 977005230827305840)),
8980 simde_mm_cvtsi64_m64(INT64_C( 4908563834369883454)),
8981 simde_mm_cvtsi64_m64(INT64_C(-3931558603542577614)), },
8982 { simde_mm_cvtsi64_m64(INT64_C(-7062092201406124762)),
8983 simde_mm_cvtsi64_m64(INT64_C( 2377066878085823882)),
8984 simde_mm_cvtsi64_m64(INT64_C( 9007584994217602972)), }
8985 };
8986
8987 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8988 simde__m64 r = simde_mm_sub_si64(test_vec[i].a, test_vec[i].b);
8989 simde_assert_m64_i64(r, ==, test_vec[i].r);
8990 }
8991
8992 return 0;
8993 }
8994
8995 static int
8996 test_simde_mm_subs_epi8(SIMDE_MUNIT_TEST_ARGS) {
8997 const struct {
8998 simde__m128i a;
8999 simde__m128i b;
9000 simde__m128i r;
9001 } test_vec[8] = {
9002 { simde_mm_set_epi8(INT8_C( 77), INT8_C( -15), INT8_C( -74), INT8_C( 52), INT8_C( -61), INT8_C( 64), INT8_C( 59), INT8_C( 21),
9003 INT8_C( -60), INT8_C( -67), INT8_C( -73), INT8_C( 104), INT8_C( 30), INT8_C( 107), INT8_C( 83), INT8_C(-117)),
9004 simde_mm_set_epi8(INT8_C(-110), INT8_C(-112), INT8_C( -53), INT8_C( 89), INT8_C( 74), INT8_C( 81), INT8_C( -22), INT8_C( -97),
9005 INT8_C(-128), INT8_C( -54), INT8_C( 101), INT8_C( 96), INT8_C( 36), INT8_C( 87), INT8_C(-125), INT8_C( 28)),
9006 simde_mm_set_epi8(INT8_C( 127), INT8_C( 97), INT8_C( -21), INT8_C( -37), INT8_C(-128), INT8_C( -17), INT8_C( 81), INT8_C( 118),
9007 INT8_C( 68), INT8_C( -13), INT8_C(-128), INT8_C( 8), INT8_C( -6), INT8_C( 20), INT8_C( 127), INT8_C(-128)) },
9008 { simde_mm_set_epi8(INT8_C( 57), INT8_C( 79), INT8_C( 101), INT8_C( 47), INT8_C( 60), INT8_C( 12), INT8_C( 0), INT8_C( -19),
9009 INT8_C( 63), INT8_C( 39), INT8_C(-108), INT8_C( 37), INT8_C( 92), INT8_C( 114), INT8_C(-110), INT8_C( 91)),
9010 simde_mm_set_epi8(INT8_C( -59), INT8_C( -81), INT8_C( 49), INT8_C( 126), INT8_C( 33), INT8_C( 120), INT8_C(-127), INT8_C( 80),
9011 INT8_C( 109), INT8_C(-100), INT8_C( 21), INT8_C(-125), INT8_C( 7), INT8_C( 60), INT8_C(-122), INT8_C( -61)),
9012 simde_mm_set_epi8(INT8_C( 116), INT8_C( 127), INT8_C( 52), INT8_C( -79), INT8_C( 27), INT8_C(-108), INT8_C( 127), INT8_C( -99),
9013 INT8_C( -46), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 85), INT8_C( 54), INT8_C( 12), INT8_C( 127)) },
9014 { simde_mm_set_epi8(INT8_C( 84), INT8_C(-104), INT8_C( -82), INT8_C( 105), INT8_C( -43), INT8_C( -36), INT8_C( 16), INT8_C( -15),
9015 INT8_C( -34), INT8_C( 120), INT8_C(-110), INT8_C( 90), INT8_C( 78), INT8_C( 45), INT8_C(-124), INT8_C( -84)),
9016 simde_mm_set_epi8(INT8_C( -66), INT8_C( -1), INT8_C( 91), INT8_C( 74), INT8_C( 83), INT8_C( -91), INT8_C( -97), INT8_C( 115),
9017 INT8_C( -29), INT8_C( 67), INT8_C( -98), INT8_C( -51), INT8_C( 110), INT8_C( -43), INT8_C( 125), INT8_C( 63)),
9018 simde_mm_set_epi8(INT8_C( 127), INT8_C(-103), INT8_C(-128), INT8_C( 31), INT8_C(-126), INT8_C( 55), INT8_C( 113), INT8_C(-128),
9019 INT8_C( -5), INT8_C( 53), INT8_C( -12), INT8_C( 127), INT8_C( -32), INT8_C( 88), INT8_C(-128), INT8_C(-128)) },
9020 { simde_mm_set_epi8(INT8_C( -75), INT8_C( 37), INT8_C( 126), INT8_C( 21), INT8_C( 92), INT8_C(-124), INT8_C( -81), INT8_C( -6),
9021 INT8_C(-117), INT8_C( -14), INT8_C( 38), INT8_C( -68), INT8_C( -45), INT8_C( 114), INT8_C( 32), INT8_C( -13)),
9022 simde_mm_set_epi8(INT8_C( 80), INT8_C(-123), INT8_C( -25), INT8_C( 71), INT8_C(-108), INT8_C( -31), INT8_C( 98), INT8_C( -67),
9023 INT8_C( -23), INT8_C(-112), INT8_C( -42), INT8_C( -16), INT8_C( -56), INT8_C( 107), INT8_C( 6), INT8_C( 16)),
9024 simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( -50), INT8_C( 127), INT8_C( -93), INT8_C(-128), INT8_C( 61),
9025 INT8_C( -94), INT8_C( 98), INT8_C( 80), INT8_C( -52), INT8_C( 11), INT8_C( 7), INT8_C( 26), INT8_C( -29)) },
9026 { simde_mm_set_epi8(INT8_C( 85), INT8_C( 18), INT8_C( 15), INT8_C( 100), INT8_C( 107), INT8_C( -69), INT8_C( -96), INT8_C( -20),
9027 INT8_C( -18), INT8_C( 42), INT8_C( 98), INT8_C( 104), INT8_C( -70), INT8_C(-121), INT8_C( -91), INT8_C( 77)),
9028 simde_mm_set_epi8(INT8_C( 103), INT8_C( -62), INT8_C( 107), INT8_C(-125), INT8_C( -86), INT8_C(-112), INT8_C( -45), INT8_C( 3),
9029 INT8_C( -26), INT8_C( 96), INT8_C( 83), INT8_C( 23), INT8_C( 100), INT8_C( 127), INT8_C( -56), INT8_C( -52)),
9030 simde_mm_set_epi8(INT8_C( -18), INT8_C( 80), INT8_C( -92), INT8_C( 127), INT8_C( 127), INT8_C( 43), INT8_C( -51), INT8_C( -23),
9031 INT8_C( 8), INT8_C( -54), INT8_C( 15), INT8_C( 81), INT8_C(-128), INT8_C(-128), INT8_C( -35), INT8_C( 127)) },
9032 { simde_mm_set_epi8(INT8_C( 63), INT8_C( 16), INT8_C( 100), INT8_C( -10), INT8_C( 78), INT8_C( 116), INT8_C( -91), INT8_C( 21),
9033 INT8_C( -10), INT8_C( -27), INT8_C( -92), INT8_C( 31), INT8_C( -23), INT8_C( -53), INT8_C( -1), INT8_C( -1)),
9034 simde_mm_set_epi8(INT8_C( 20), INT8_C(-123), INT8_C( 36), INT8_C( -10), INT8_C( 127), INT8_C(-111), INT8_C( -60), INT8_C( 54),
9035 INT8_C( 92), INT8_C( 101), INT8_C( -13), INT8_C( -31), INT8_C(-124), INT8_C( 112), INT8_C(-118), INT8_C( -29)),
9036 simde_mm_set_epi8(INT8_C( 43), INT8_C( 127), INT8_C( 64), INT8_C( 0), INT8_C( -49), INT8_C( 127), INT8_C( -31), INT8_C( -33),
9037 INT8_C(-102), INT8_C(-128), INT8_C( -79), INT8_C( 62), INT8_C( 101), INT8_C(-128), INT8_C( 117), INT8_C( 28)) },
9038 { simde_mm_set_epi8(INT8_C( 1), INT8_C( -28), INT8_C( -45), INT8_C( -32), INT8_C(-103), INT8_C( 27), INT8_C( -38), INT8_C(-127),
9039 INT8_C( -89), INT8_C( -74), INT8_C( 47), INT8_C( 91), INT8_C( 46), INT8_C( -24), INT8_C( 60), INT8_C( 23)),
9040 simde_mm_set_epi8(INT8_C( -25), INT8_C( -68), INT8_C(-116), INT8_C( 92), INT8_C( 33), INT8_C( -5), INT8_C( -35), INT8_C( -44),
9041 INT8_C( -9), INT8_C( -90), INT8_C( 63), INT8_C( 108), INT8_C( 36), INT8_C( 27), INT8_C( 112), INT8_C( -11)),
9042 simde_mm_set_epi8(INT8_C( 26), INT8_C( 40), INT8_C( 71), INT8_C(-124), INT8_C(-128), INT8_C( 32), INT8_C( -3), INT8_C( -83),
9043 INT8_C( -80), INT8_C( 16), INT8_C( -16), INT8_C( -17), INT8_C( 10), INT8_C( -51), INT8_C( -52), INT8_C( 34)) },
9044 { simde_mm_set_epi8(INT8_C( 29), INT8_C( 123), INT8_C( -8), INT8_C( -35), INT8_C( 3), INT8_C( -97), INT8_C( 124), INT8_C(-121),
9045 INT8_C( 52), INT8_C( 75), INT8_C( -93), INT8_C(-127), INT8_C( -78), INT8_C( 87), INT8_C( 102), INT8_C( 119)),
9046 simde_mm_set_epi8(INT8_C( 51), INT8_C( -89), INT8_C( -6), INT8_C( 8), INT8_C( -19), INT8_C( -88), INT8_C( 22), INT8_C( 21),
9047 INT8_C( -37), INT8_C( -42), INT8_C( -97), INT8_C( 58), INT8_C( 70), INT8_C( -92), INT8_C(-100), INT8_C(-124)),
9048 simde_mm_set_epi8(INT8_C( -22), INT8_C( 127), INT8_C( -2), INT8_C( -43), INT8_C( 22), INT8_C( -9), INT8_C( 102), INT8_C(-128),
9049 INT8_C( 89), INT8_C( 117), INT8_C( 4), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127)) }
9050 };
9051
9052 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9053 simde__m128i r = simde_mm_subs_epi8(test_vec[i].a, test_vec[i].b);
9054 simde_assert_m128i_i8(r, ==, test_vec[i].r);
9055 }
9056
9057 return 0;
9058 }
9059
9060 static int
9061 test_simde_mm_subs_epi16(SIMDE_MUNIT_TEST_ARGS) {
9062 const struct {
9063 simde__m128i a;
9064 simde__m128i b;
9065 simde__m128i r;
9066 } test_vec[8] = {
9067 { simde_mm_set_epi16(INT16_C( 3087), INT16_C(-11046), INT16_C( 16009), INT16_C( -2784),
9068 INT16_C( 23836), INT16_C( 341), INT16_C( 25120), INT16_C( 792)),
9069 simde_mm_set_epi16(INT16_C(-13938), INT16_C( 11942), INT16_C( 18967), INT16_C(-24666),
9070 INT16_C(-12194), INT16_C(-15141), INT16_C( 27868), INT16_C( 7667)),
9071 simde_mm_set_epi16(INT16_C( 17025), INT16_C(-22988), INT16_C( -2958), INT16_C( 21882),
9072 INT16_C( 32767), INT16_C( 15482), INT16_C( -2748), INT16_C( -6875)) },
9073 { simde_mm_set_epi16(INT16_C( 15944), INT16_C( 21174), INT16_C(-19487), INT16_C( 30166),
9074 INT16_C( 9880), INT16_C( 2293), INT16_C( 1544), INT16_C( 6216)),
9075 simde_mm_set_epi16(INT16_C(-22637), INT16_C( 27460), INT16_C( 16112), INT16_C(-21899),
9076 INT16_C( 28784), INT16_C( -234), INT16_C( -5361), INT16_C( 25377)),
9077 simde_mm_set_epi16(INT16_C( 32767), INT16_C( -6286), INT16_C(-32768), INT16_C( 32767),
9078 INT16_C(-18904), INT16_C( 2527), INT16_C( 6905), INT16_C(-19161)) },
9079 { simde_mm_set_epi16(INT16_C( 25177), INT16_C( 16000), INT16_C(-30398), INT16_C(-17760),
9080 INT16_C( 16727), INT16_C( -4856), INT16_C(-10813), INT16_C( 11418)),
9081 simde_mm_set_epi16(INT16_C( 25832), INT16_C(-14964), INT16_C( 17267), INT16_C( -2360),
9082 INT16_C( 15960), INT16_C( 12601), INT16_C( 9707), INT16_C( 24108)),
9083 simde_mm_set_epi16(INT16_C( -655), INT16_C( 30964), INT16_C(-32768), INT16_C(-15400),
9084 INT16_C( 767), INT16_C(-17457), INT16_C(-20520), INT16_C(-12690)) },
9085 { simde_mm_set_epi16(INT16_C(-19601), INT16_C(-21914), INT16_C(-30623), INT16_C( -8160),
9086 INT16_C( 24427), INT16_C(-16073), INT16_C( 14239), INT16_C( 20391)),
9087 simde_mm_set_epi16(INT16_C(-19582), INT16_C(-27440), INT16_C( -9450), INT16_C(-25104),
9088 INT16_C( 11842), INT16_C( 4749), INT16_C( 3094), INT16_C( 19163)),
9089 simde_mm_set_epi16(INT16_C( -19), INT16_C( 5526), INT16_C(-21173), INT16_C( 16944),
9090 INT16_C( 12585), INT16_C(-20822), INT16_C( 11145), INT16_C( 1228)) },
9091 { simde_mm_set_epi16(INT16_C(-10118), INT16_C( 25388), INT16_C(-18110), INT16_C( -8312),
9092 INT16_C( 5249), INT16_C( 27800), INT16_C( 2023), INT16_C( 338)),
9093 simde_mm_set_epi16(INT16_C( 14501), INT16_C( 30804), INT16_C( 26885), INT16_C(-32444),
9094 INT16_C(-27012), INT16_C(-14925), INT16_C(-31013), INT16_C( 10807)),
9095 simde_mm_set_epi16(INT16_C(-24619), INT16_C( -5416), INT16_C(-32768), INT16_C( 24132),
9096 INT16_C( 32261), INT16_C( 32767), INT16_C( 32767), INT16_C(-10469)) },
9097 { simde_mm_set_epi16(INT16_C(-17246), INT16_C(-28624), INT16_C( 13423), INT16_C( 27394),
9098 INT16_C( 7877), INT16_C(-20368), INT16_C(-24205), INT16_C(-15569)),
9099 simde_mm_set_epi16(INT16_C(-21987), INT16_C( -4056), INT16_C( 2917), INT16_C( 23573),
9100 INT16_C( -2283), INT16_C( 21821), INT16_C( 32369), INT16_C( 26504)),
9101 simde_mm_set_epi16(INT16_C( 4741), INT16_C(-24568), INT16_C( 10506), INT16_C( 3821),
9102 INT16_C( 10160), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) },
9103 { simde_mm_set_epi16(INT16_C(-10290), INT16_C( 29918), INT16_C(-29258), INT16_C(-28749),
9104 INT16_C( 6048), INT16_C(-25677), INT16_C( 24207), INT16_C( 366)),
9105 simde_mm_set_epi16(INT16_C( 13339), INT16_C(-11229), INT16_C( 23811), INT16_C( -333),
9106 INT16_C(-29847), INT16_C( 21714), INT16_C( 2843), INT16_C( -2618)),
9107 simde_mm_set_epi16(INT16_C(-23629), INT16_C( 32767), INT16_C(-32768), INT16_C(-28416),
9108 INT16_C( 32767), INT16_C(-32768), INT16_C( 21364), INT16_C( 2984)) },
9109 { simde_mm_set_epi16(INT16_C( 824), INT16_C( 19299), INT16_C(-14246), INT16_C(-19942),
9110 INT16_C( 17549), INT16_C( 5220), INT16_C(-11590), INT16_C(-29570)),
9111 simde_mm_set_epi16(INT16_C( 30144), INT16_C(-11230), INT16_C(-24828), INT16_C( 29586),
9112 INT16_C( 29999), INT16_C( 25519), INT16_C( 5645), INT16_C( 16976)),
9113 simde_mm_set_epi16(INT16_C(-29320), INT16_C( 30529), INT16_C( 10582), INT16_C(-32768),
9114 INT16_C(-12450), INT16_C(-20299), INT16_C(-17235), INT16_C(-32768)) }
9115 };
9116
9117 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9118 simde__m128i r = simde_mm_subs_epi16(test_vec[i].a, test_vec[i].b);
9119 simde_assert_m128i_i16(r, ==, test_vec[i].r);
9120 }
9121
9122 return 0;
9123 }
9124
9125 static int
9126 test_simde_mm_subs_epu8(SIMDE_MUNIT_TEST_ARGS) {
9127 const struct {
9128 simde__m128i a;
9129 simde__m128i b;
9130 simde__m128i r;
9131 } test_vec[8] = {
9132 { simde_x_mm_set_epu8( 29, 76, 238, 61, 229, 243, 175, 238,
9133 75, 27, 166, 154, 166, 157, 121, 248),
9134 simde_x_mm_set_epu8(129, 19, 253, 149, 9, 247, 10, 249,
9135 105, 205, 179, 225, 124, 146, 91, 221),
9136 simde_x_mm_set_epu8( 0, 57, 0, 0, 220, 0, 165, 0,
9137 0, 0, 0, 0, 42, 11, 30, 27) },
9138 { simde_x_mm_set_epu8(101, 150, 221, 18, 105, 115, 165, 92,
9139 211, 64, 38, 72, 139, 6, 65, 201),
9140 simde_x_mm_set_epu8(124, 107, 110, 57, 116, 209, 153, 76,
9141 122, 56, 60, 234, 120, 132, 4, 95),
9142 simde_x_mm_set_epu8( 0, 43, 111, 0, 0, 0, 12, 16,
9143 89, 8, 0, 0, 19, 0, 61, 106) },
9144 { simde_x_mm_set_epu8(198, 232, 134, 13, 155, 189, 203, 84,
9145 209, 255, 163, 211, 57, 177, 19, 86),
9146 simde_x_mm_set_epu8(205, 92, 216, 169, 196, 192, 93, 101,
9147 208, 230, 232, 36, 70, 151, 125, 72),
9148 simde_x_mm_set_epu8( 0, 140, 0, 0, 0, 0, 110, 0,
9149 1, 25, 0, 175, 0, 26, 0, 14) },
9150 { simde_x_mm_set_epu8(150, 141, 253, 10, 218, 100, 243, 17,
9151 87, 99, 224, 222, 198, 181, 26, 41),
9152 simde_x_mm_set_epu8(221, 130, 146, 56, 57, 169, 46, 50,
9153 234, 43, 8, 172, 95, 74, 51, 101),
9154 simde_x_mm_set_epu8( 0, 11, 107, 0, 161, 0, 197, 0,
9155 0, 56, 216, 50, 103, 107, 0, 0) },
9156 { simde_x_mm_set_epu8( 91, 188, 127, 216, 55, 208, 83, 14,
9157 153, 114, 48, 224, 59, 66, 100, 10),
9158 simde_x_mm_set_epu8( 88, 28, 13, 17, 78, 38, 8, 111,
9159 57, 44, 184, 85, 188, 182, 235, 151),
9160 simde_x_mm_set_epu8( 3, 160, 114, 199, 0, 170, 75, 0,
9161 96, 70, 0, 139, 0, 0, 0, 0) },
9162 { simde_x_mm_set_epu8(116, 32, 155, 196, 56, 42, 17, 217,
9163 51, 162, 4, 4, 150, 83, 16, 147),
9164 simde_x_mm_set_epu8(216, 235, 181, 255, 89, 143, 40, 48,
9165 52, 24, 160, 9, 162, 223, 243, 117),
9166 simde_x_mm_set_epu8( 0, 0, 0, 0, 0, 0, 0, 169,
9167 0, 138, 0, 0, 0, 0, 0, 30) },
9168 { simde_x_mm_set_epu8(217, 238, 218, 168, 98, 146, 87, 217,
9169 135, 103, 179, 182, 128, 74, 156, 3),
9170 simde_x_mm_set_epu8(157, 0, 179, 231, 176, 37, 226, 198,
9171 145, 138, 239, 164, 0, 170, 52, 61),
9172 simde_x_mm_set_epu8( 60, 238, 39, 0, 0, 109, 0, 19,
9173 0, 0, 0, 18, 128, 0, 104, 0) },
9174 { simde_x_mm_set_epu8(181, 83, 160, 141, 77, 119, 160, 171,
9175 112, 95, 47, 88, 0, 90, 237, 18),
9176 simde_x_mm_set_epu8(139, 146, 25, 173, 34, 31, 251, 200,
9177 190, 131, 23, 41, 246, 91, 98, 221),
9178 simde_x_mm_set_epu8( 42, 0, 135, 0, 43, 88, 0, 0,
9179 0, 0, 24, 47, 0, 0, 139, 0) }
9180 };
9181
9182 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9183 simde__m128i r = simde_mm_subs_epu8(test_vec[i].a, test_vec[i].b);
9184 simde_assert_m128i_u8(r, ==, test_vec[i].r);
9185 }
9186
9187 return 0;
9188 }
9189
9190 static int
9191 test_simde_mm_subs_epu16(SIMDE_MUNIT_TEST_ARGS) {
9192 const struct {
9193 simde__m128i a;
9194 simde__m128i b;
9195 simde__m128i r;
9196 } test_vec[8] = {
9197 { simde_x_mm_set_epu16(55440, 59202, 42058, 53369, 32796, 7917, 33818, 17136),
9198 simde_x_mm_set_epu16(26104, 52689, 47050, 39249, 59785, 38246, 31610, 10518),
9199 simde_x_mm_set_epu16(29336, 6513, 0, 14120, 0, 0, 2208, 6618) },
9200 { simde_x_mm_set_epu16(34216, 34652, 60066, 36214, 4826, 65416, 55052, 33573),
9201 simde_x_mm_set_epu16(26443, 15803, 4000, 33420, 50076, 27556, 5522, 41665),
9202 simde_x_mm_set_epu16( 7773, 18849, 56066, 2794, 0, 37860, 49530, 0) },
9203 { simde_x_mm_set_epu16(64499, 21603, 35445, 16287, 15728, 23400, 23336, 39270),
9204 simde_x_mm_set_epu16(56255, 54924, 45249, 41636, 27152, 13319, 19428, 768),
9205 simde_x_mm_set_epu16( 8244, 0, 0, 0, 0, 10081, 3908, 38502) },
9206 { simde_x_mm_set_epu16( 1242, 22793, 21812, 57045, 22651, 26751, 59072, 30159),
9207 simde_x_mm_set_epu16(11521, 44413, 36849, 788, 57441, 54148, 2979, 46303),
9208 simde_x_mm_set_epu16( 0, 0, 0, 56257, 0, 0, 56093, 0) },
9209 { simde_x_mm_set_epu16(37620, 40488, 64998, 40075, 44204, 34122, 59592, 65445),
9210 simde_x_mm_set_epu16(40351, 64891, 27393, 62063, 1981, 56033, 30691, 62006),
9211 simde_x_mm_set_epu16( 0, 0, 37605, 0, 42223, 0, 28901, 3439) },
9212 { simde_x_mm_set_epu16(65230, 30209, 16765, 1470, 31101, 49860, 26882, 55440),
9213 simde_x_mm_set_epu16(49049, 44537, 10442, 42049, 271, 49034, 11746, 5994),
9214 simde_x_mm_set_epu16(16181, 0, 6323, 0, 30830, 826, 15136, 49446) },
9215 { simde_x_mm_set_epu16(37013, 9547, 22144, 27612, 32177, 62691, 50927, 50782),
9216 simde_x_mm_set_epu16(18153, 2530, 10375, 48140, 7056, 62459, 20700, 31971),
9217 simde_x_mm_set_epu16(18860, 7017, 11769, 0, 25121, 232, 30227, 18811) },
9218 { simde_x_mm_set_epu16( 9831, 28967, 28080, 17470, 59616, 18625, 64250, 31724),
9219 simde_x_mm_set_epu16(52094, 35298, 55420, 3659, 42707, 55727, 29250, 17787),
9220 simde_x_mm_set_epu16( 0, 0, 0, 13811, 16909, 0, 35000, 13937) }
9221 };
9222
9223 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9224 simde__m128i r = simde_mm_subs_epu16(test_vec[i].a, test_vec[i].b);
9225 simde_assert_m128i_u16(r, ==, test_vec[i].r);
9226 }
9227
9228 return 0;
9229 }
9230
9231 static int
9232 test_simde_mm_ucomieq_sd(SIMDE_MUNIT_TEST_ARGS) {
9233 const struct {
9234 simde__m128d a;
9235 simde__m128d b;
9236 int r;
9237 } test_vec[8] = {
9238 { simde_mm_set_pd(SIMDE_FLOAT64_C( 523.45), SIMDE_FLOAT64_C( -718.90)),
9239 simde_mm_set_pd(SIMDE_FLOAT64_C( 39.72), SIMDE_FLOAT64_C( 184.39)),
9240 0 },
9241 { simde_mm_set_pd(SIMDE_FLOAT64_C( 666.01), SIMDE_FLOAT64_C( -592.10)),
9242 simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)),
9243 1 },
9244 { simde_mm_set_pd(SIMDE_FLOAT64_C( 840.01), SIMDE_FLOAT64_C( -550.36)),
9245 simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)),
9246 0 },
9247 { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C( 791.25)),
9248 simde_mm_set_pd(SIMDE_FLOAT64_C( 791.25), SIMDE_FLOAT64_C( 791.25)),
9249 1 },
9250 { simde_mm_set_pd(SIMDE_FLOAT64_C( 743.24), SIMDE_FLOAT64_C( 945.47)),
9251 simde_mm_set_pd(SIMDE_FLOAT64_C( 945.47), SIMDE_FLOAT64_C( 844.58)),
9252 0 },
9253 { simde_mm_set_pd(SIMDE_FLOAT64_C( 938.39), SIMDE_FLOAT64_C( -590.62)),
9254 simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)),
9255 0 },
9256 { simde_mm_set_pd(SIMDE_FLOAT64_C( 876.49), SIMDE_FLOAT64_C( 503.26)),
9257 simde_mm_set_pd(SIMDE_FLOAT64_C( 503.26), SIMDE_FLOAT64_C( 503.26)),
9258 1 },
9259 { simde_mm_set_pd(SIMDE_FLOAT64_C( 927.98), SIMDE_FLOAT64_C( -197.60)),
9260 simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)),
9261 1 }
9262 };
9263
9264 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9265 int r = simde_mm_ucomieq_sd(test_vec[i].a, test_vec[i].b);
9266 simde_assert_equal_i(r, test_vec[i].r);
9267 }
9268
9269 return 0;
9270 }
9271
9272 static int
9273 test_simde_mm_ucomige_sd(SIMDE_MUNIT_TEST_ARGS) {
9274 const struct {
9275 simde__m128d a;
9276 simde__m128d b;
9277 int r;
9278 } test_vec[8] = {
9279 { simde_mm_set_pd(SIMDE_FLOAT64_C( 214.53), SIMDE_FLOAT64_C( 606.90)),
9280 simde_mm_set_pd(SIMDE_FLOAT64_C( 814.33), SIMDE_FLOAT64_C( 606.90)),
9281 1 },
9282 { simde_mm_set_pd(SIMDE_FLOAT64_C( -487.58), SIMDE_FLOAT64_C( 444.56)),
9283 simde_mm_set_pd(SIMDE_FLOAT64_C( -781.36), SIMDE_FLOAT64_C( 30.46)),
9284 1 },
9285 { simde_mm_set_pd(SIMDE_FLOAT64_C( 605.28), SIMDE_FLOAT64_C( -943.32)),
9286 simde_mm_set_pd(SIMDE_FLOAT64_C( -943.32), SIMDE_FLOAT64_C( -943.32)),
9287 1 },
9288 { simde_mm_set_pd(SIMDE_FLOAT64_C( -981.47), SIMDE_FLOAT64_C( 31.75)),
9289 simde_mm_set_pd(SIMDE_FLOAT64_C( 31.75), SIMDE_FLOAT64_C( 299.12)),
9290 0 },
9291 { simde_mm_set_pd(SIMDE_FLOAT64_C( 480.83), SIMDE_FLOAT64_C( 255.57)),
9292 simde_mm_set_pd(SIMDE_FLOAT64_C( 946.90), SIMDE_FLOAT64_C( 608.16)),
9293 0 },
9294 { simde_mm_set_pd(SIMDE_FLOAT64_C( 634.58), SIMDE_FLOAT64_C( 320.38)),
9295 simde_mm_set_pd(SIMDE_FLOAT64_C( 320.38), SIMDE_FLOAT64_C( 942.24)),
9296 0 },
9297 { simde_mm_set_pd(SIMDE_FLOAT64_C( 98.67), SIMDE_FLOAT64_C( 118.05)),
9298 simde_mm_set_pd(SIMDE_FLOAT64_C( 118.05), SIMDE_FLOAT64_C( 118.05)),
9299 1 },
9300 { simde_mm_set_pd(SIMDE_FLOAT64_C( 544.57), SIMDE_FLOAT64_C( 783.14)),
9301 simde_mm_set_pd(SIMDE_FLOAT64_C( 636.80), SIMDE_FLOAT64_C( 783.14)),
9302 1 }
9303 };
9304
9305 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9306 int r = simde_mm_ucomige_sd(test_vec[i].a, test_vec[i].b);
9307 simde_assert_equal_i(r, test_vec[i].r);
9308 }
9309
9310 return 0;
9311 }
9312
9313 static int
9314 test_simde_mm_ucomigt_sd(SIMDE_MUNIT_TEST_ARGS) {
9315 const struct {
9316 simde__m128d a;
9317 simde__m128d b;
9318 int r;
9319 } test_vec[8] = {
9320 { simde_mm_set_pd(SIMDE_FLOAT64_C( 523.45), SIMDE_FLOAT64_C( -718.90)),
9321 simde_mm_set_pd(SIMDE_FLOAT64_C( 39.72), SIMDE_FLOAT64_C( 184.39)),
9322 0 },
9323 { simde_mm_set_pd(SIMDE_FLOAT64_C( 666.01), SIMDE_FLOAT64_C( -592.10)),
9324 simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)),
9325 0 },
9326 { simde_mm_set_pd(SIMDE_FLOAT64_C( 840.01), SIMDE_FLOAT64_C( -550.36)),
9327 simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)),
9328 1 },
9329 { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C( 791.25)),
9330 simde_mm_set_pd(SIMDE_FLOAT64_C( 791.25), SIMDE_FLOAT64_C( 791.25)),
9331 0 },
9332 { simde_mm_set_pd(SIMDE_FLOAT64_C( 743.24), SIMDE_FLOAT64_C( 945.47)),
9333 simde_mm_set_pd(SIMDE_FLOAT64_C( 945.47), SIMDE_FLOAT64_C( 844.58)),
9334 1 },
9335 { simde_mm_set_pd(SIMDE_FLOAT64_C( 938.39), SIMDE_FLOAT64_C( -590.62)),
9336 simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)),
9337 0 },
9338 { simde_mm_set_pd(SIMDE_FLOAT64_C( 876.49), SIMDE_FLOAT64_C( 503.26)),
9339 simde_mm_set_pd(SIMDE_FLOAT64_C( 503.26), SIMDE_FLOAT64_C( 503.26)),
9340 0 },
9341 { simde_mm_set_pd(SIMDE_FLOAT64_C( 927.98), SIMDE_FLOAT64_C( -197.60)),
9342 simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)),
9343 0 }
9344 };
9345
9346 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9347 int r = simde_mm_ucomigt_sd(test_vec[i].a, test_vec[i].b);
9348 simde_assert_equal_i(r, test_vec[i].r);
9349 }
9350
9351 return 0;
9352 }
9353
9354 static int
9355 test_simde_mm_ucomile_sd(SIMDE_MUNIT_TEST_ARGS) {
9356 const struct {
9357 simde__m128d a;
9358 simde__m128d b;
9359 int r;
9360 } test_vec[8] = {
9361 { simde_mm_set_pd(SIMDE_FLOAT64_C( 523.45), SIMDE_FLOAT64_C( -718.90)),
9362 simde_mm_set_pd(SIMDE_FLOAT64_C( 39.72), SIMDE_FLOAT64_C( 184.39)),
9363 1 },
9364 { simde_mm_set_pd(SIMDE_FLOAT64_C( 666.01), SIMDE_FLOAT64_C( -592.10)),
9365 simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)),
9366 1 },
9367 { simde_mm_set_pd(SIMDE_FLOAT64_C( 840.01), SIMDE_FLOAT64_C( -550.36)),
9368 simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)),
9369 0 },
9370 { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C( 791.25)),
9371 simde_mm_set_pd(SIMDE_FLOAT64_C( 791.25), SIMDE_FLOAT64_C( 791.25)),
9372 1 },
9373 { simde_mm_set_pd(SIMDE_FLOAT64_C( 743.24), SIMDE_FLOAT64_C( 945.47)),
9374 simde_mm_set_pd(SIMDE_FLOAT64_C( 945.47), SIMDE_FLOAT64_C( 844.58)),
9375 0 },
9376 { simde_mm_set_pd(SIMDE_FLOAT64_C( 938.39), SIMDE_FLOAT64_C( -590.62)),
9377 simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)),
9378 1 },
9379 { simde_mm_set_pd(SIMDE_FLOAT64_C( 876.49), SIMDE_FLOAT64_C( 503.26)),
9380 simde_mm_set_pd(SIMDE_FLOAT64_C( 503.26), SIMDE_FLOAT64_C( 503.26)),
9381 1 },
9382 { simde_mm_set_pd(SIMDE_FLOAT64_C( 927.98), SIMDE_FLOAT64_C( -197.60)),
9383 simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)),
9384 1 }
9385 };
9386
9387 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9388 int r = simde_mm_ucomile_sd(test_vec[i].a, test_vec[i].b);
9389 simde_assert_equal_i(r, test_vec[i].r);
9390 }
9391
9392 return 0;
9393 }
9394
9395 static int
9396 test_simde_mm_ucomilt_sd(SIMDE_MUNIT_TEST_ARGS) {
9397 const struct {
9398 simde__m128d a;
9399 simde__m128d b;
9400 int r;
9401 } test_vec[8] = {
9402 { simde_mm_set_pd(SIMDE_FLOAT64_C( 523.45), SIMDE_FLOAT64_C( -718.90)),
9403 simde_mm_set_pd(SIMDE_FLOAT64_C( 39.72), SIMDE_FLOAT64_C( 184.39)),
9404 1 },
9405 { simde_mm_set_pd(SIMDE_FLOAT64_C( 666.01), SIMDE_FLOAT64_C( -592.10)),
9406 simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)),
9407 0 },
9408 { simde_mm_set_pd(SIMDE_FLOAT64_C( 840.01), SIMDE_FLOAT64_C( -550.36)),
9409 simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)),
9410 0 },
9411 { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C( 791.25)),
9412 simde_mm_set_pd(SIMDE_FLOAT64_C( 791.25), SIMDE_FLOAT64_C( 791.25)),
9413 0 },
9414 { simde_mm_set_pd(SIMDE_FLOAT64_C( 743.24), SIMDE_FLOAT64_C( 945.47)),
9415 simde_mm_set_pd(SIMDE_FLOAT64_C( 945.47), SIMDE_FLOAT64_C( 844.58)),
9416 0 },
9417 { simde_mm_set_pd(SIMDE_FLOAT64_C( 938.39), SIMDE_FLOAT64_C( -590.62)),
9418 simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)),
9419 1 },
9420 { simde_mm_set_pd(SIMDE_FLOAT64_C( 876.49), SIMDE_FLOAT64_C( 503.26)),
9421 simde_mm_set_pd(SIMDE_FLOAT64_C( 503.26), SIMDE_FLOAT64_C( 503.26)),
9422 0 },
9423 { simde_mm_set_pd(SIMDE_FLOAT64_C( 927.98), SIMDE_FLOAT64_C( -197.60)),
9424 simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)),
9425 0 }
9426 };
9427
9428 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9429 int r = simde_mm_ucomilt_sd(test_vec[i].a, test_vec[i].b);
9430 simde_assert_equal_i(r, test_vec[i].r);
9431 }
9432
9433 return 0;
9434 }
9435
9436 static int
9437 test_simde_mm_undefined_pd(SIMDE_MUNIT_TEST_ARGS) {
9438 simde__m128d z = simde_mm_setzero_pd();
9439 simde__m128d v = simde_mm_undefined_pd();
9440 v = simde_mm_xor_pd(v, v);
9441
9442 simde_assert_m128d_equal(v, z);
9443
9444 return 0;
9445 }
9446
9447 static int
9448 test_simde_mm_undefined_si128(SIMDE_MUNIT_TEST_ARGS) {
9449 simde__m128i z = simde_mm_setzero_si128();
9450 simde__m128i v = simde_mm_undefined_si128();
9451 v = simde_mm_xor_si128(v, v);
9452
9453 simde_assert_m128i_equal(v, z);
9454
9455 return 0;
9456 }
9457
9458
9459 static int
9460 test_simde_mm_ucomineq_sd(SIMDE_MUNIT_TEST_ARGS) {
9461 const struct {
9462 simde__m128d a;
9463 simde__m128d b;
9464 int r;
9465 } test_vec[8] = {
9466 { simde_mm_set_pd(SIMDE_FLOAT64_C( 523.45), SIMDE_FLOAT64_C( -718.90)),
9467 simde_mm_set_pd(SIMDE_FLOAT64_C( 39.72), SIMDE_FLOAT64_C( 184.39)),
9468 1 },
9469 { simde_mm_set_pd(SIMDE_FLOAT64_C( 666.01), SIMDE_FLOAT64_C( -592.10)),
9470 simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)),
9471 0 },
9472 { simde_mm_set_pd(SIMDE_FLOAT64_C( 840.01), SIMDE_FLOAT64_C( -550.36)),
9473 simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)),
9474 1 },
9475 { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C( 791.25)),
9476 simde_mm_set_pd(SIMDE_FLOAT64_C( 791.25), SIMDE_FLOAT64_C( 791.25)),
9477 0 },
9478 { simde_mm_set_pd(SIMDE_FLOAT64_C( 743.24), SIMDE_FLOAT64_C( 945.47)),
9479 simde_mm_set_pd(SIMDE_FLOAT64_C( 945.47), SIMDE_FLOAT64_C( 844.58)),
9480 1 },
9481 { simde_mm_set_pd(SIMDE_FLOAT64_C( 938.39), SIMDE_FLOAT64_C( -590.62)),
9482 simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)),
9483 1 },
9484 { simde_mm_set_pd(SIMDE_FLOAT64_C( 876.49), SIMDE_FLOAT64_C( 503.26)),
9485 simde_mm_set_pd(SIMDE_FLOAT64_C( 503.26), SIMDE_FLOAT64_C( 503.26)),
9486 0 },
9487 { simde_mm_set_pd(SIMDE_FLOAT64_C( 927.98), SIMDE_FLOAT64_C( -197.60)),
9488 simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)),
9489 0 }
9490 };
9491
9492 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9493 int r = simde_mm_ucomineq_sd(test_vec[i].a, test_vec[i].b);
9494 simde_assert_equal_i(r, test_vec[i].r);
9495 }
9496
9497 return 0;
9498 }
9499
9500 static int
9501 test_simde_mm_unpackhi_epi8(SIMDE_MUNIT_TEST_ARGS) {
9502 const struct {
9503 simde__m128i a;
9504 simde__m128i b;
9505 simde__m128i r;
9506 } test_vec[8] = {
9507 { simde_mm_set_epi8(INT8_C( -36), INT8_C( 95), INT8_C( 84), INT8_C(-106), INT8_C( 32), INT8_C( 120), INT8_C( 19), INT8_C( -16),
9508 INT8_C( 46), INT8_C(-123), INT8_C(-117), INT8_C( 46), INT8_C( 18), INT8_C( -72), INT8_C( -36), INT8_C( 78)),
9509 simde_mm_set_epi8(INT8_C( 10), INT8_C(-106), INT8_C( -91), INT8_C( -62), INT8_C( -61), INT8_C( -62), INT8_C( -83), INT8_C( -45),
9510 INT8_C( -36), INT8_C( 17), INT8_C( 58), INT8_C(-114), INT8_C( 96), INT8_C(-102), INT8_C( -89), INT8_C( -20)),
9511 simde_mm_set_epi8(INT8_C( 10), INT8_C( -36), INT8_C(-106), INT8_C( 95), INT8_C( -91), INT8_C( 84), INT8_C( -62), INT8_C(-106),
9512 INT8_C( -61), INT8_C( 32), INT8_C( -62), INT8_C( 120), INT8_C( -83), INT8_C( 19), INT8_C( -45), INT8_C( -16)) },
9513 { simde_mm_set_epi8(INT8_C( -54), INT8_C(-123), INT8_C( 74), INT8_C( 62), INT8_C( 43), INT8_C( 85), INT8_C( -99), INT8_C( -95),
9514 INT8_C( -93), INT8_C( -92), INT8_C( 121), INT8_C( 82), INT8_C( 61), INT8_C(-110), INT8_C(-111), INT8_C( -40)),
9515 simde_mm_set_epi8(INT8_C( 61), INT8_C( -50), INT8_C( 88), INT8_C( -56), INT8_C( 14), INT8_C( -92), INT8_C(-109), INT8_C( -80),
9516 INT8_C( -22), INT8_C( -61), INT8_C(-108), INT8_C( 69), INT8_C( -82), INT8_C( 29), INT8_C( 38), INT8_C( -72)),
9517 simde_mm_set_epi8(INT8_C( 61), INT8_C( -54), INT8_C( -50), INT8_C(-123), INT8_C( 88), INT8_C( 74), INT8_C( -56), INT8_C( 62),
9518 INT8_C( 14), INT8_C( 43), INT8_C( -92), INT8_C( 85), INT8_C(-109), INT8_C( -99), INT8_C( -80), INT8_C( -95)) },
9519 { simde_mm_set_epi8(INT8_C(-103), INT8_C( -78), INT8_C( -94), INT8_C( -12), INT8_C( -31), INT8_C( -92), INT8_C( -17), INT8_C( 16),
9520 INT8_C(-122), INT8_C( 113), INT8_C( -48), INT8_C( -99), INT8_C( 32), INT8_C( -67), INT8_C( 124), INT8_C( 107)),
9521 simde_mm_set_epi8(INT8_C( 42), INT8_C( 65), INT8_C( -45), INT8_C( -19), INT8_C( -55), INT8_C( -49), INT8_C( -54), INT8_C( 56),
9522 INT8_C( -67), INT8_C( -54), INT8_C(-109), INT8_C( -80), INT8_C( -85), INT8_C( 96), INT8_C( -36), INT8_C( -69)),
9523 simde_mm_set_epi8(INT8_C( 42), INT8_C(-103), INT8_C( 65), INT8_C( -78), INT8_C( -45), INT8_C( -94), INT8_C( -19), INT8_C( -12),
9524 INT8_C( -55), INT8_C( -31), INT8_C( -49), INT8_C( -92), INT8_C( -54), INT8_C( -17), INT8_C( 56), INT8_C( 16)) },
9525 { simde_mm_set_epi8(INT8_C( -33), INT8_C( -6), INT8_C( -31), INT8_C( -33), INT8_C( -45), INT8_C( -71), INT8_C( 119), INT8_C( 79),
9526 INT8_C( 29), INT8_C( 8), INT8_C( -44), INT8_C( -42), INT8_C( 113), INT8_C( -23), INT8_C( 53), INT8_C(-118)),
9527 simde_mm_set_epi8(INT8_C( -4), INT8_C( -47), INT8_C( -67), INT8_C( 41), INT8_C( 84), INT8_C( 5), INT8_C( -24), INT8_C( 123),
9528 INT8_C( 102), INT8_C( -69), INT8_C( 66), INT8_C( 117), INT8_C(-128), INT8_C( 115), INT8_C( -2), INT8_C( -19)),
9529 simde_mm_set_epi8(INT8_C( -4), INT8_C( -33), INT8_C( -47), INT8_C( -6), INT8_C( -67), INT8_C( -31), INT8_C( 41), INT8_C( -33),
9530 INT8_C( 84), INT8_C( -45), INT8_C( 5), INT8_C( -71), INT8_C( -24), INT8_C( 119), INT8_C( 123), INT8_C( 79)) },
9531 { simde_mm_set_epi8(INT8_C(-100), INT8_C( -57), INT8_C( -5), INT8_C(-111), INT8_C( 124), INT8_C(-127), INT8_C( -90), INT8_C( -88),
9532 INT8_C( 23), INT8_C(-114), INT8_C( -41), INT8_C( -98), INT8_C( 73), INT8_C( 14), INT8_C( 5), INT8_C( 46)),
9533 simde_mm_set_epi8(INT8_C( 66), INT8_C(-115), INT8_C( -36), INT8_C( -25), INT8_C( -75), INT8_C(-124), INT8_C( 96), INT8_C( 16),
9534 INT8_C( 14), INT8_C( 103), INT8_C( -98), INT8_C(-105), INT8_C( -21), INT8_C( -89), INT8_C( -87), INT8_C( -43)),
9535 simde_mm_set_epi8(INT8_C( 66), INT8_C(-100), INT8_C(-115), INT8_C( -57), INT8_C( -36), INT8_C( -5), INT8_C( -25), INT8_C(-111),
9536 INT8_C( -75), INT8_C( 124), INT8_C(-124), INT8_C(-127), INT8_C( 96), INT8_C( -90), INT8_C( 16), INT8_C( -88)) },
9537 { simde_mm_set_epi8(INT8_C( -66), INT8_C( -23), INT8_C( -71), INT8_C( 103), INT8_C( 67), INT8_C( -33), INT8_C(-118), INT8_C( -19),
9538 INT8_C( 25), INT8_C( -53), INT8_C( 56), INT8_C( 16), INT8_C(-126), INT8_C( 121), INT8_C( 96), INT8_C(-121)),
9539 simde_mm_set_epi8(INT8_C( -16), INT8_C( 18), INT8_C( 55), INT8_C(-104), INT8_C(-120), INT8_C( 39), INT8_C( -14), INT8_C( 76),
9540 INT8_C( 39), INT8_C( 41), INT8_C( -81), INT8_C( -9), INT8_C( -56), INT8_C(-103), INT8_C( 3), INT8_C( -27)),
9541 simde_mm_set_epi8(INT8_C( -16), INT8_C( -66), INT8_C( 18), INT8_C( -23), INT8_C( 55), INT8_C( -71), INT8_C(-104), INT8_C( 103),
9542 INT8_C(-120), INT8_C( 67), INT8_C( 39), INT8_C( -33), INT8_C( -14), INT8_C(-118), INT8_C( 76), INT8_C( -19)) },
9543 { simde_mm_set_epi8(INT8_C( 114), INT8_C( -36), INT8_C( 60), INT8_C( -26), INT8_C( 24), INT8_C( -63), INT8_C( -29), INT8_C( 114),
9544 INT8_C( 74), INT8_C( -94), INT8_C( 33), INT8_C( -33), INT8_C( 38), INT8_C( 109), INT8_C( 31), INT8_C( -91)),
9545 simde_mm_set_epi8(INT8_C( -28), INT8_C( -92), INT8_C( 30), INT8_C(-101), INT8_C( -7), INT8_C( 1), INT8_C(-108), INT8_C( 29),
9546 INT8_C( 114), INT8_C( 44), INT8_C( -8), INT8_C(-107), INT8_C( -68), INT8_C( 90), INT8_C( 100), INT8_C( -37)),
9547 simde_mm_set_epi8(INT8_C( -28), INT8_C( 114), INT8_C( -92), INT8_C( -36), INT8_C( 30), INT8_C( 60), INT8_C(-101), INT8_C( -26),
9548 INT8_C( -7), INT8_C( 24), INT8_C( 1), INT8_C( -63), INT8_C(-108), INT8_C( -29), INT8_C( 29), INT8_C( 114)) },
9549 { simde_mm_set_epi8(INT8_C( 83), INT8_C( -32), INT8_C( -17), INT8_C( -35), INT8_C( 52), INT8_C( -64), INT8_C( 46), INT8_C( 89),
9550 INT8_C( -65), INT8_C( -27), INT8_C(-104), INT8_C( 5), INT8_C( 84), INT8_C( 41), INT8_C( 88), INT8_C( 34)),
9551 simde_mm_set_epi8(INT8_C( -95), INT8_C( 93), INT8_C(-118), INT8_C( -44), INT8_C( 65), INT8_C( 114), INT8_C( 28), INT8_C( -90),
9552 INT8_C( -85), INT8_C( 102), INT8_C( 78), INT8_C( -99), INT8_C(-120), INT8_C( 43), INT8_C( -56), INT8_C( 25)),
9553 simde_mm_set_epi8(INT8_C( -95), INT8_C( 83), INT8_C( 93), INT8_C( -32), INT8_C(-118), INT8_C( -17), INT8_C( -44), INT8_C( -35),
9554 INT8_C( 65), INT8_C( 52), INT8_C( 114), INT8_C( -64), INT8_C( 28), INT8_C( 46), INT8_C( -90), INT8_C( 89)) }
9555 };
9556
9557 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9558 simde__m128i r = simde_mm_unpackhi_epi8(test_vec[i].a, test_vec[i].b);
9559 simde_assert_m128i_i8(r, ==, test_vec[i].r);
9560 }
9561
9562 return 0;
9563 }
9564
9565 static int
9566 test_simde_mm_unpackhi_epi16(SIMDE_MUNIT_TEST_ARGS) {
9567 const struct {
9568 simde__m128i a;
9569 simde__m128i b;
9570 simde__m128i r;
9571 } test_vec[8] = {
9572 { simde_mm_set_epi16(INT16_C( 18787), INT16_C( 30957), INT16_C( 6745), INT16_C( 5288),
9573 INT16_C(-10333), INT16_C( 29461), INT16_C( 961), INT16_C(-14007)),
9574 simde_mm_set_epi16(INT16_C(-29691), INT16_C( 32561), INT16_C(-16442), INT16_C( -4659),
9575 INT16_C( 21222), INT16_C(-21527), INT16_C( 30610), INT16_C( 14168)),
9576 simde_mm_set_epi16(INT16_C(-29691), INT16_C( 18787), INT16_C( 32561), INT16_C( 30957),
9577 INT16_C(-16442), INT16_C( 6745), INT16_C( -4659), INT16_C( 5288)) },
9578 { simde_mm_set_epi16(INT16_C( 14241), INT16_C(-17353), INT16_C( 15871), INT16_C( 3653),
9579 INT16_C(-29200), INT16_C( -9979), INT16_C(-30607), INT16_C( 31741)),
9580 simde_mm_set_epi16(INT16_C( 16753), INT16_C( 10981), INT16_C( 24190), INT16_C( 25811),
9581 INT16_C( 6793), INT16_C( -6051), INT16_C( 1979), INT16_C(-14675)),
9582 simde_mm_set_epi16(INT16_C( 16753), INT16_C( 14241), INT16_C( 10981), INT16_C(-17353),
9583 INT16_C( 24190), INT16_C( 15871), INT16_C( 25811), INT16_C( 3653)) },
9584 { simde_mm_set_epi16(INT16_C( 24118), INT16_C( -7950), INT16_C( 8813), INT16_C( 23815),
9585 INT16_C(-12880), INT16_C( 22441), INT16_C(-31736), INT16_C( 28417)),
9586 simde_mm_set_epi16(INT16_C( -2535), INT16_C(-21518), INT16_C( 10955), INT16_C(-16484),
9587 INT16_C(-17119), INT16_C( 5667), INT16_C( 5018), INT16_C( -9313)),
9588 simde_mm_set_epi16(INT16_C( -2535), INT16_C( 24118), INT16_C(-21518), INT16_C( -7950),
9589 INT16_C( 10955), INT16_C( 8813), INT16_C(-16484), INT16_C( 23815)) },
9590 { simde_mm_set_epi16(INT16_C(-15717), INT16_C( 7765), INT16_C(-27156), INT16_C( 26721),
9591 INT16_C( -2021), INT16_C( -7166), INT16_C( 832), INT16_C( 3368)),
9592 simde_mm_set_epi16(INT16_C(-17604), INT16_C( -2433), INT16_C(-22343), INT16_C( -9047),
9593 INT16_C( -8009), INT16_C(-14884), INT16_C(-31015), INT16_C( 9072)),
9594 simde_mm_set_epi16(INT16_C(-17604), INT16_C(-15717), INT16_C( -2433), INT16_C( 7765),
9595 INT16_C(-22343), INT16_C(-27156), INT16_C( -9047), INT16_C( 26721)) },
9596 { simde_mm_set_epi16(INT16_C( 9613), INT16_C(-25734), INT16_C(-29111), INT16_C( -6271),
9597 INT16_C( 28183), INT16_C( 5627), INT16_C( 23471), INT16_C(-31640)),
9598 simde_mm_set_epi16(INT16_C( 17448), INT16_C(-17387), INT16_C( 12535), INT16_C( 19499),
9599 INT16_C( 11772), INT16_C( 2463), INT16_C( 20494), INT16_C( -6320)),
9600 simde_mm_set_epi16(INT16_C( 17448), INT16_C( 9613), INT16_C(-17387), INT16_C(-25734),
9601 INT16_C( 12535), INT16_C(-29111), INT16_C( 19499), INT16_C( -6271)) },
9602 { simde_mm_set_epi16(INT16_C(-23597), INT16_C(-19655), INT16_C(-17057), INT16_C( 18059),
9603 INT16_C( 9484), INT16_C( 5905), INT16_C( 26068), INT16_C( 7424)),
9604 simde_mm_set_epi16(INT16_C(-16983), INT16_C( -3720), INT16_C(-18613), INT16_C( 7615),
9605 INT16_C(-29369), INT16_C(-17019), INT16_C( 736), INT16_C( 23842)),
9606 simde_mm_set_epi16(INT16_C(-16983), INT16_C(-23597), INT16_C( -3720), INT16_C(-19655),
9607 INT16_C(-18613), INT16_C(-17057), INT16_C( 7615), INT16_C( 18059)) },
9608 { simde_mm_set_epi16(INT16_C( 10339), INT16_C( 5875), INT16_C(-28772), INT16_C( 4220),
9609 INT16_C( 31801), INT16_C( 29049), INT16_C( 31270), INT16_C(-18878)),
9610 simde_mm_set_epi16(INT16_C(-18888), INT16_C( 24242), INT16_C(-31726), INT16_C(-29025),
9611 INT16_C( 845), INT16_C( -8031), INT16_C( 4992), INT16_C( -3599)),
9612 simde_mm_set_epi16(INT16_C(-18888), INT16_C( 10339), INT16_C( 24242), INT16_C( 5875),
9613 INT16_C(-31726), INT16_C(-28772), INT16_C(-29025), INT16_C( 4220)) },
9614 { simde_mm_set_epi16(INT16_C(-14097), INT16_C( 31063), INT16_C(-25063), INT16_C( 16951),
9615 INT16_C(-20725), INT16_C( 5387), INT16_C( -3219), INT16_C(-20465)),
9616 simde_mm_set_epi16(INT16_C(-23465), INT16_C(-30434), INT16_C( 28479), INT16_C(-15276),
9617 INT16_C(-28694), INT16_C( -9228), INT16_C( 22420), INT16_C(-31453)),
9618 simde_mm_set_epi16(INT16_C(-23465), INT16_C(-14097), INT16_C(-30434), INT16_C( 31063),
9619 INT16_C( 28479), INT16_C(-25063), INT16_C(-15276), INT16_C( 16951)) }
9620 };
9621
9622 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9623 simde__m128i r = simde_mm_unpackhi_epi16(test_vec[i].a, test_vec[i].b);
9624 simde_assert_m128i_i16(r, ==, test_vec[i].r);
9625 }
9626
9627 return 0;
9628 }
9629
9630 static int
9631 test_simde_mm_unpackhi_epi32(SIMDE_MUNIT_TEST_ARGS) {
9632 const struct {
9633 simde__m128i a;
9634 simde__m128i b;
9635 simde__m128i r;
9636 } test_vec[8] = {
9637 { simde_mm_set_epi32(INT32_C( 168291084), INT32_C( 803222516), INT32_C(-2059191165), INT32_C( 156619127)),
9638 simde_mm_set_epi32(INT32_C( 1247164255), INT32_C(-1585504202), INT32_C( 81979034), INT32_C(-1257437380)),
9639 simde_mm_set_epi32(INT32_C( 1247164255), INT32_C( 168291084), INT32_C(-1585504202), INT32_C( 803222516)) },
9640 { simde_mm_set_epi32(INT32_C(-1229392695), INT32_C( -447420261), INT32_C( -26173961), INT32_C( 1549193795)),
9641 simde_mm_set_epi32(INT32_C(-1584985518), INT32_C(-1825626458), INT32_C( 1790250510), INT32_C( -280669042)),
9642 simde_mm_set_epi32(INT32_C(-1584985518), INT32_C(-1229392695), INT32_C(-1825626458), INT32_C( -447420261)) },
9643 { simde_mm_set_epi32(INT32_C( -648698663), INT32_C( 1485053046), INT32_C(-2125470397), INT32_C( 507664294)),
9644 simde_mm_set_epi32(INT32_C( -735759218), INT32_C( -710175418), INT32_C(-1695159870), INT32_C(-1167064304)),
9645 simde_mm_set_epi32(INT32_C( -735759218), INT32_C( -648698663), INT32_C( -710175418), INT32_C( 1485053046)) },
9646 { simde_mm_set_epi32(INT32_C( -103259786), INT32_C( -188357300), INT32_C( 452180145), INT32_C(-1396420115)),
9647 simde_mm_set_epi32(INT32_C( 1404727965), INT32_C( -804737565), INT32_C(-1054802326), INT32_C( 1642647928)),
9648 simde_mm_set_epi32(INT32_C( 1404727965), INT32_C( -103259786), INT32_C( -804737565), INT32_C( -188357300)) },
9649 { simde_mm_set_epi32(INT32_C( 1212827068), INT32_C( 1189440629), INT32_C(-1547155816), INT32_C( 1839063433)),
9650 simde_mm_set_epi32(INT32_C( 796540528), INT32_C( -982269468), INT32_C( -40316418), INT32_C( -430354120)),
9651 simde_mm_set_epi32(INT32_C( 796540528), INT32_C( 1212827068), INT32_C( -982269468), INT32_C( 1189440629)) },
9652 { simde_mm_set_epi32(INT32_C( 1356454008), INT32_C( -215878264), INT32_C(-1695191474), INT32_C( 378220333)),
9653 simde_mm_set_epi32(INT32_C( -864195447), INT32_C(-1443486627), INT32_C(-2133730470), INT32_C( 373467456)),
9654 simde_mm_set_epi32(INT32_C( -864195447), INT32_C( 1356454008), INT32_C(-1443486627), INT32_C( -215878264)) },
9655 { simde_mm_set_epi32(INT32_C( 764442598), INT32_C( 1720554406), INT32_C( 1938751418), INT32_C( 1005471402)),
9656 simde_mm_set_epi32(INT32_C( 883878116), INT32_C( 255422854), INT32_C( 583152961), INT32_C( -594123403)),
9657 simde_mm_set_epi32(INT32_C( 883878116), INT32_C( 764442598), INT32_C( 255422854), INT32_C( 1720554406)) },
9658 { simde_mm_set_epi32(INT32_C( -822423451), INT32_C( -180339328), INT32_C( -689601673), INT32_C(-1524838623)),
9659 simde_mm_set_epi32(INT32_C( -665157473), INT32_C(-2141208691), INT32_C(-1935796365), INT32_C( -482464349)),
9660 simde_mm_set_epi32(INT32_C( -665157473), INT32_C( -822423451), INT32_C(-2141208691), INT32_C( -180339328)) }
9661 };
9662
9663 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9664 simde__m128i r = simde_mm_unpackhi_epi32(test_vec[i].a, test_vec[i].b);
9665 simde_assert_m128i_i32(r, ==, test_vec[i].r);
9666 }
9667
9668 return 0;
9669 }
9670
9671 static int
9672 test_simde_mm_unpackhi_epi64(SIMDE_MUNIT_TEST_ARGS) {
9673 const struct {
9674 simde__m128i a;
9675 simde__m128i b;
9676 simde__m128i r;
9677 } test_vec[8] = {
9678 { simde_mm_set_epi64x(INT64_C( 722804702791611380), INT64_C(-8844158709730520713)),
9679 simde_mm_set_epi64x(INT64_C( 5356529690674667574), INT64_C( 352097273025201980)),
9680 simde_mm_set_epi64x(INT64_C( 5356529690674667574), INT64_C( 722804702791611380)) },
9681 { simde_mm_set_epi64x(INT64_C(-5280201415118755685), INT64_C( -112416304952585661)),
9682 simde_mm_set_epi64x(INT64_C(-6807460961974278490), INT64_C( 7689067396111619214)),
9683 simde_mm_set_epi64x(INT64_C(-6807460961974278490), INT64_C(-5280201415118755685)) },
9684 { simde_mm_set_epi64x(INT64_C(-2786139541058872202), INT64_C(-9128825843223472218)),
9685 simde_mm_set_epi64x(INT64_C(-3160061775455742650), INT64_C(-7280656200013708528)),
9686 simde_mm_set_epi64x(INT64_C(-3160061775455742650), INT64_C(-2786139541058872202)) },
9687 { simde_mm_set_epi64x(INT64_C( -443497399755348660), INT64_C( 1942098937574085101)),
9688 simde_mm_set_epi64x(INT64_C( 6033260672941862371), INT64_C(-4530341492272082568)),
9689 simde_mm_set_epi64x(INT64_C( 6033260672941862371), INT64_C( -443497399755348660)) },
9690 { simde_mm_set_epi64x(INT64_C( 5209052593953008757), INT64_C(-6644983629697130103)),
9691 simde_mm_set_epi64x(INT64_C( 3421115521011270116), INT64_C( -173157692937252552)),
9692 simde_mm_set_epi64x(INT64_C( 3421115521011270116), INT64_C( 5209052593953008757)) },
9693 { simde_mm_set_epi64x(INT64_C( 5825925606967211400), INT64_C(-7280791940909813971)),
9694 simde_mm_set_epi64x(INT64_C(-3711691179365620643), INT64_C(-9164302586755241664)),
9695 simde_mm_set_epi64x(INT64_C(-3711691179365620643), INT64_C( 5825925606967211400)) },
9696 { simde_mm_set_epi64x(INT64_C( 3283255959799829414), INT64_C( 8326873936389097130)),
9697 simde_mm_set_epi64x(INT64_C( 3796227602125517190), INT64_C( 2504622899761407349)),
9698 simde_mm_set_epi64x(INT64_C( 3796227602125517190), INT64_C( 3283255959799829414)) },
9699 { simde_mm_set_epi64x(INT64_C(-3532281821393830528), INT64_C(-2961816630031757535)),
9700 simde_mm_set_epi64x(INT64_C(-2856829591071244403), INT64_C(-8314182075578176093)),
9701 simde_mm_set_epi64x(INT64_C(-2856829591071244403), INT64_C(-3532281821393830528)) }
9702 };
9703
9704 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9705 simde__m128i r = simde_mm_unpackhi_epi64(test_vec[i].a, test_vec[i].b);
9706 simde_assert_m128i_i64(r, ==, test_vec[i].r);
9707 }
9708
9709 return 0;
9710 }
9711
9712 static int
9713 test_simde_mm_unpackhi_pd(SIMDE_MUNIT_TEST_ARGS) {
9714 const struct {
9715 simde__m128d a;
9716 simde__m128d b;
9717 simde__m128d r;
9718 } test_vec[8] = {
9719 { simde_mm_set_pd(SIMDE_FLOAT64_C( -788.38), SIMDE_FLOAT64_C( -23.22)),
9720 simde_mm_set_pd(SIMDE_FLOAT64_C( -996.21), SIMDE_FLOAT64_C( 645.47)),
9721 simde_mm_set_pd(SIMDE_FLOAT64_C( -996.21), SIMDE_FLOAT64_C( -788.38)) },
9722 { simde_mm_set_pd(SIMDE_FLOAT64_C( -986.13), SIMDE_FLOAT64_C( 267.77)),
9723 simde_mm_set_pd(SIMDE_FLOAT64_C( 401.03), SIMDE_FLOAT64_C( 978.53)),
9724 simde_mm_set_pd(SIMDE_FLOAT64_C( 401.03), SIMDE_FLOAT64_C( -986.13)) },
9725 { simde_mm_set_pd(SIMDE_FLOAT64_C( -468.32), SIMDE_FLOAT64_C( -478.73)),
9726 simde_mm_set_pd(SIMDE_FLOAT64_C( -484.79), SIMDE_FLOAT64_C( -613.68)),
9727 simde_mm_set_pd(SIMDE_FLOAT64_C( -484.79), SIMDE_FLOAT64_C( -468.32)) },
9728 { simde_mm_set_pd(SIMDE_FLOAT64_C( 169.21), SIMDE_FLOAT64_C( 897.06)),
9729 simde_mm_set_pd(SIMDE_FLOAT64_C( -872.63), SIMDE_FLOAT64_C( -172.69)),
9730 simde_mm_set_pd(SIMDE_FLOAT64_C( -872.63), SIMDE_FLOAT64_C( 169.21)) },
9731 { simde_mm_set_pd(SIMDE_FLOAT64_C( 499.02), SIMDE_FLOAT64_C( 28.99)),
9732 simde_mm_set_pd(SIMDE_FLOAT64_C( 532.77), SIMDE_FLOAT64_C( -718.79)),
9733 simde_mm_set_pd(SIMDE_FLOAT64_C( 532.77), SIMDE_FLOAT64_C( 499.02)) },
9734 { simde_mm_set_pd(SIMDE_FLOAT64_C( 208.34), SIMDE_FLOAT64_C( 635.19)),
9735 simde_mm_set_pd(SIMDE_FLOAT64_C( -165.40), SIMDE_FLOAT64_C( 391.08)),
9736 simde_mm_set_pd(SIMDE_FLOAT64_C( -165.40), SIMDE_FLOAT64_C( 208.34)) },
9737 { simde_mm_set_pd(SIMDE_FLOAT64_C( -371.80), SIMDE_FLOAT64_C( 698.49)),
9738 simde_mm_set_pd(SIMDE_FLOAT64_C( 603.26), SIMDE_FLOAT64_C( 962.25)),
9739 simde_mm_set_pd(SIMDE_FLOAT64_C( 603.26), SIMDE_FLOAT64_C( -371.80)) },
9740 { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.32), SIMDE_FLOAT64_C( 149.18)),
9741 simde_mm_set_pd(SIMDE_FLOAT64_C( 349.36), SIMDE_FLOAT64_C( -60.66)),
9742 simde_mm_set_pd(SIMDE_FLOAT64_C( 349.36), SIMDE_FLOAT64_C( -939.32)) }
9743 };
9744
9745 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9746 simde__m128d r = simde_mm_unpackhi_pd(test_vec[i].a, test_vec[i].b);
9747 simde_assert_m128d_close(r, test_vec[i].r, 1);
9748 }
9749
9750 return 0;
9751 }
9752
9753 static int
9754 test_simde_mm_unpacklo_epi8(SIMDE_MUNIT_TEST_ARGS) {
9755 const struct {
9756 simde__m128i a;
9757 simde__m128i b;
9758 simde__m128i r;
9759 } test_vec[8] = {
9760 { simde_mm_set_epi8(INT8_C( -46), INT8_C( 11), INT8_C( -95), INT8_C( -6), INT8_C(-108), INT8_C(-107), INT8_C( -24), INT8_C( -94),
9761 INT8_C( -54), INT8_C( 2), INT8_C( 111), INT8_C( 78), INT8_C( 16), INT8_C( -54), INT8_C( -31), INT8_C( -19)),
9762 simde_mm_set_epi8(INT8_C( 40), INT8_C( -29), INT8_C( -79), INT8_C( -49), INT8_C( 12), INT8_C( -63), INT8_C( 87), INT8_C( 55),
9763 INT8_C( 121), INT8_C( 100), INT8_C( -21), INT8_C( -2), INT8_C( -22), INT8_C( 29), INT8_C( 110), INT8_C(-110)),
9764 simde_mm_set_epi8(INT8_C( 121), INT8_C( -54), INT8_C( 100), INT8_C( 2), INT8_C( -21), INT8_C( 111), INT8_C( -2), INT8_C( 78),
9765 INT8_C( -22), INT8_C( 16), INT8_C( 29), INT8_C( -54), INT8_C( 110), INT8_C( -31), INT8_C(-110), INT8_C( -19)) },
9766 { simde_mm_set_epi8(INT8_C( 40), INT8_C( -52), INT8_C( -72), INT8_C( 9), INT8_C( -57), INT8_C( -62), INT8_C(-100), INT8_C( 119),
9767 INT8_C( 120), INT8_C( -83), INT8_C( 102), INT8_C( -39), INT8_C( -78), INT8_C( -92), INT8_C( -76), INT8_C( 121)),
9768 simde_mm_set_epi8(INT8_C( 7), INT8_C( -69), INT8_C(-112), INT8_C( 84), INT8_C( -8), INT8_C( 23), INT8_C( 71), INT8_C( -37),
9769 INT8_C( 104), INT8_C(-121), INT8_C( -93), INT8_C( 99), INT8_C( 47), INT8_C(-114), INT8_C( -52), INT8_C( 101)),
9770 simde_mm_set_epi8(INT8_C( 104), INT8_C( 120), INT8_C(-121), INT8_C( -83), INT8_C( -93), INT8_C( 102), INT8_C( 99), INT8_C( -39),
9771 INT8_C( 47), INT8_C( -78), INT8_C(-114), INT8_C( -92), INT8_C( -52), INT8_C( -76), INT8_C( 101), INT8_C( 121)) },
9772 { simde_mm_set_epi8(INT8_C( 23), INT8_C( 31), INT8_C( -95), INT8_C( -23), INT8_C( -83), INT8_C( 40), INT8_C( -32), INT8_C( -4),
9773 INT8_C( 97), INT8_C( 107), INT8_C(-118), INT8_C( 28), INT8_C( 58), INT8_C( -42), INT8_C( 6), INT8_C( 14)),
9774 simde_mm_set_epi8(INT8_C( 87), INT8_C( -63), INT8_C( 17), INT8_C( -66), INT8_C( -73), INT8_C( -52), INT8_C( 21), INT8_C( -51),
9775 INT8_C( 77), INT8_C( 127), INT8_C(-123), INT8_C( 35), INT8_C( -87), INT8_C( 10), INT8_C(-116), INT8_C( -15)),
9776 simde_mm_set_epi8(INT8_C( 77), INT8_C( 97), INT8_C( 127), INT8_C( 107), INT8_C(-123), INT8_C(-118), INT8_C( 35), INT8_C( 28),
9777 INT8_C( -87), INT8_C( 58), INT8_C( 10), INT8_C( -42), INT8_C(-116), INT8_C( 6), INT8_C( -15), INT8_C( 14)) },
9778 { simde_mm_set_epi8(INT8_C( 82), INT8_C( -82), INT8_C( 120), INT8_C(-117), INT8_C( 95), INT8_C( 34), INT8_C( 57), INT8_C(-126),
9779 INT8_C( 125), INT8_C( -41), INT8_C( 26), INT8_C( -67), INT8_C( -28), INT8_C( 110), INT8_C( 56), INT8_C( 8)),
9780 simde_mm_set_epi8(INT8_C( 43), INT8_C( 84), INT8_C( -22), INT8_C( -23), INT8_C(-118), INT8_C( 101), INT8_C( -61), INT8_C( 0),
9781 INT8_C( 102), INT8_C( 10), INT8_C( -14), INT8_C( -26), INT8_C( -16), INT8_C( -9), INT8_C(-102), INT8_C( -6)),
9782 simde_mm_set_epi8(INT8_C( 102), INT8_C( 125), INT8_C( 10), INT8_C( -41), INT8_C( -14), INT8_C( 26), INT8_C( -26), INT8_C( -67),
9783 INT8_C( -16), INT8_C( -28), INT8_C( -9), INT8_C( 110), INT8_C(-102), INT8_C( 56), INT8_C( -6), INT8_C( 8)) },
9784 { simde_mm_set_epi8(INT8_C( -53), INT8_C( -22), INT8_C( 64), INT8_C( -17), INT8_C( -84), INT8_C(-128), INT8_C(-124), INT8_C( -98),
9785 INT8_C( -10), INT8_C( -24), INT8_C( 47), INT8_C( 109), INT8_C( 15), INT8_C( -93), INT8_C( -3), INT8_C( -83)),
9786 simde_mm_set_epi8(INT8_C( 102), INT8_C( 24), INT8_C( 10), INT8_C( 77), INT8_C( -47), INT8_C( 121), INT8_C( -9), INT8_C( 31),
9787 INT8_C( 5), INT8_C( 32), INT8_C( -40), INT8_C( 72), INT8_C(-114), INT8_C( -28), INT8_C( 76), INT8_C( 98)),
9788 simde_mm_set_epi8(INT8_C( 5), INT8_C( -10), INT8_C( 32), INT8_C( -24), INT8_C( -40), INT8_C( 47), INT8_C( 72), INT8_C( 109),
9789 INT8_C(-114), INT8_C( 15), INT8_C( -28), INT8_C( -93), INT8_C( 76), INT8_C( -3), INT8_C( 98), INT8_C( -83)) },
9790 { simde_mm_set_epi8(INT8_C( 42), INT8_C(-126), INT8_C( -81), INT8_C( -3), INT8_C( 60), INT8_C( -79), INT8_C( 80), INT8_C( -92),
9791 INT8_C( -48), INT8_C( 40), INT8_C(-125), INT8_C( 24), INT8_C( 38), INT8_C( -84), INT8_C( 120), INT8_C( 92)),
9792 simde_mm_set_epi8(INT8_C(-118), INT8_C(-121), INT8_C( 29), INT8_C(-128), INT8_C(-101), INT8_C( 4), INT8_C( -66), INT8_C( 29),
9793 INT8_C( -3), INT8_C( 82), INT8_C( -7), INT8_C( -87), INT8_C( 76), INT8_C( 52), INT8_C(-124), INT8_C( 86)),
9794 simde_mm_set_epi8(INT8_C( -3), INT8_C( -48), INT8_C( 82), INT8_C( 40), INT8_C( -7), INT8_C(-125), INT8_C( -87), INT8_C( 24),
9795 INT8_C( 76), INT8_C( 38), INT8_C( 52), INT8_C( -84), INT8_C(-124), INT8_C( 120), INT8_C( 86), INT8_C( 92)) },
9796 { simde_mm_set_epi8(INT8_C(-121), INT8_C( 102), INT8_C( -71), INT8_C(-105), INT8_C(-120), INT8_C( 124), INT8_C( -56), INT8_C( 80),
9797 INT8_C( -23), INT8_C( 26), INT8_C(-103), INT8_C( 31), INT8_C( -30), INT8_C( -86), INT8_C( 103), INT8_C( -93)),
9798 simde_mm_set_epi8(INT8_C(-114), INT8_C( 9), INT8_C( 28), INT8_C( -23), INT8_C( 125), INT8_C( 28), INT8_C( -55), INT8_C( -13),
9799 INT8_C( -41), INT8_C( 123), INT8_C( -52), INT8_C( 49), INT8_C( -94), INT8_C( -66), INT8_C( 69), INT8_C( 75)),
9800 simde_mm_set_epi8(INT8_C( -41), INT8_C( -23), INT8_C( 123), INT8_C( 26), INT8_C( -52), INT8_C(-103), INT8_C( 49), INT8_C( 31),
9801 INT8_C( -94), INT8_C( -30), INT8_C( -66), INT8_C( -86), INT8_C( 69), INT8_C( 103), INT8_C( 75), INT8_C( -93)) },
9802 { simde_mm_set_epi8(INT8_C( -30), INT8_C( 56), INT8_C( -7), INT8_C( -85), INT8_C( -3), INT8_C( -30), INT8_C( 87), INT8_C( 101),
9803 INT8_C(-112), INT8_C( -18), INT8_C( 7), INT8_C( 45), INT8_C( 32), INT8_C( 103), INT8_C( -2), INT8_C( 100)),
9804 simde_mm_set_epi8(INT8_C( 75), INT8_C( -55), INT8_C( 1), INT8_C( 61), INT8_C(-126), INT8_C( -76), INT8_C( 61), INT8_C( -69),
9805 INT8_C( -86), INT8_C( 110), INT8_C( -52), INT8_C( 110), INT8_C( 96), INT8_C( -55), INT8_C( 76), INT8_C( 15)),
9806 simde_mm_set_epi8(INT8_C( -86), INT8_C(-112), INT8_C( 110), INT8_C( -18), INT8_C( -52), INT8_C( 7), INT8_C( 110), INT8_C( 45),
9807 INT8_C( 96), INT8_C( 32), INT8_C( -55), INT8_C( 103), INT8_C( 76), INT8_C( -2), INT8_C( 15), INT8_C( 100)) }
9808 };
9809
9810 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9811 simde__m128i r = simde_mm_unpacklo_epi8(test_vec[i].a, test_vec[i].b);
9812 simde_assert_m128i_i8(r, ==, test_vec[i].r);
9813 }
9814
9815 return 0;
9816 }
9817
9818 static int
9819 test_simde_mm_unpacklo_epi16(SIMDE_MUNIT_TEST_ARGS) {
9820 const struct {
9821 simde__m128i a;
9822 simde__m128i b;
9823 simde__m128i r;
9824 } test_vec[8] = {
9825 { simde_mm_set_epi16(INT16_C(-11765), INT16_C(-24070), INT16_C(-27499), INT16_C( -5982),
9826 INT16_C(-13822), INT16_C( 28494), INT16_C( 4298), INT16_C( -7699)),
9827 simde_mm_set_epi16(INT16_C( 10467), INT16_C(-20017), INT16_C( 3265), INT16_C( 22327),
9828 INT16_C( 31076), INT16_C( -5122), INT16_C( -5603), INT16_C( 28306)),
9829 simde_mm_set_epi16(INT16_C( 31076), INT16_C(-13822), INT16_C( -5122), INT16_C( 28494),
9830 INT16_C( -5603), INT16_C( 4298), INT16_C( 28306), INT16_C( -7699)) },
9831 { simde_mm_set_epi16(INT16_C( 10444), INT16_C(-18423), INT16_C(-14398), INT16_C(-25481),
9832 INT16_C( 30893), INT16_C( 26329), INT16_C(-19804), INT16_C(-19335)),
9833 simde_mm_set_epi16(INT16_C( 1979), INT16_C(-28588), INT16_C( -2025), INT16_C( 18395),
9834 INT16_C( 26759), INT16_C(-23709), INT16_C( 12174), INT16_C(-13211)),
9835 simde_mm_set_epi16(INT16_C( 26759), INT16_C( 30893), INT16_C(-23709), INT16_C( 26329),
9836 INT16_C( 12174), INT16_C(-19804), INT16_C(-13211), INT16_C(-19335)) },
9837 { simde_mm_set_epi16(INT16_C( 5919), INT16_C(-24087), INT16_C(-21208), INT16_C( -7940),
9838 INT16_C( 24939), INT16_C(-30180), INT16_C( 15062), INT16_C( 1550)),
9839 simde_mm_set_epi16(INT16_C( 22465), INT16_C( 4542), INT16_C(-18484), INT16_C( 5581),
9840 INT16_C( 19839), INT16_C(-31453), INT16_C(-22262), INT16_C(-29455)),
9841 simde_mm_set_epi16(INT16_C( 19839), INT16_C( 24939), INT16_C(-31453), INT16_C(-30180),
9842 INT16_C(-22262), INT16_C( 15062), INT16_C(-29455), INT16_C( 1550)) },
9843 { simde_mm_set_epi16(INT16_C( 21166), INT16_C( 30859), INT16_C( 24354), INT16_C( 14722),
9844 INT16_C( 32215), INT16_C( 6845), INT16_C( -7058), INT16_C( 14344)),
9845 simde_mm_set_epi16(INT16_C( 11092), INT16_C( -5399), INT16_C(-30107), INT16_C(-15616),
9846 INT16_C( 26122), INT16_C( -3354), INT16_C( -3849), INT16_C(-25862)),
9847 simde_mm_set_epi16(INT16_C( 26122), INT16_C( 32215), INT16_C( -3354), INT16_C( 6845),
9848 INT16_C( -3849), INT16_C( -7058), INT16_C(-25862), INT16_C( 14344)) },
9849 { simde_mm_set_epi16(INT16_C(-13334), INT16_C( 16623), INT16_C(-21376), INT16_C(-31586),
9850 INT16_C( -2328), INT16_C( 12141), INT16_C( 4003), INT16_C( -595)),
9851 simde_mm_set_epi16(INT16_C( 26136), INT16_C( 2637), INT16_C(-11911), INT16_C( -2273),
9852 INT16_C( 1312), INT16_C(-10168), INT16_C(-28956), INT16_C( 19554)),
9853 simde_mm_set_epi16(INT16_C( 1312), INT16_C( -2328), INT16_C(-10168), INT16_C( 12141),
9854 INT16_C(-28956), INT16_C( 4003), INT16_C( 19554), INT16_C( -595)) },
9855 { simde_mm_set_epi16(INT16_C( 10882), INT16_C(-20483), INT16_C( 15537), INT16_C( 20644),
9856 INT16_C(-12248), INT16_C(-31976), INT16_C( 9900), INT16_C( 30812)),
9857 simde_mm_set_epi16(INT16_C(-30073), INT16_C( 7552), INT16_C(-25852), INT16_C(-16867),
9858 INT16_C( -686), INT16_C( -1623), INT16_C( 19508), INT16_C(-31658)),
9859 simde_mm_set_epi16(INT16_C( -686), INT16_C(-12248), INT16_C( -1623), INT16_C(-31976),
9860 INT16_C( 19508), INT16_C( 9900), INT16_C(-31658), INT16_C( 30812)) },
9861 { simde_mm_set_epi16(INT16_C(-30874), INT16_C(-18025), INT16_C(-30596), INT16_C(-14256),
9862 INT16_C( -5862), INT16_C(-26337), INT16_C( -7510), INT16_C( 26531)),
9863 simde_mm_set_epi16(INT16_C(-29175), INT16_C( 7401), INT16_C( 32028), INT16_C(-13837),
9864 INT16_C(-10373), INT16_C(-13263), INT16_C(-23874), INT16_C( 17739)),
9865 simde_mm_set_epi16(INT16_C(-10373), INT16_C( -5862), INT16_C(-13263), INT16_C(-26337),
9866 INT16_C(-23874), INT16_C( -7510), INT16_C( 17739), INT16_C( 26531)) },
9867 { simde_mm_set_epi16(INT16_C( -7624), INT16_C( -1621), INT16_C( -542), INT16_C( 22373),
9868 INT16_C(-28434), INT16_C( 1837), INT16_C( 8295), INT16_C( -412)),
9869 simde_mm_set_epi16(INT16_C( 19401), INT16_C( 317), INT16_C(-32076), INT16_C( 15803),
9870 INT16_C(-21906), INT16_C(-13202), INT16_C( 24777), INT16_C( 19471)),
9871 simde_mm_set_epi16(INT16_C(-21906), INT16_C(-28434), INT16_C(-13202), INT16_C( 1837),
9872 INT16_C( 24777), INT16_C( 8295), INT16_C( 19471), INT16_C( -412)) }
9873 };
9874
9875 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9876 simde__m128i r = simde_mm_unpacklo_epi16(test_vec[i].a, test_vec[i].b);
9877 simde_assert_m128i_i16(r, ==, test_vec[i].r);
9878 }
9879
9880 return 0;
9881 }
9882
9883 static int
9884 test_simde_mm_unpacklo_epi32(SIMDE_MUNIT_TEST_ARGS) {
9885 const struct {
9886 simde__m128i a;
9887 simde__m128i b;
9888 simde__m128i r;
9889 } test_vec[8] = {
9890 { simde_mm_set_epi32(INT32_C( -770989574), INT32_C(-1802114910), INT32_C( -905810098), INT32_C( 281731565)),
9891 simde_mm_set_epi32(INT32_C( 686010831), INT32_C( 213997367), INT32_C( 2036657150), INT32_C( -367169902)),
9892 simde_mm_set_epi32(INT32_C( 2036657150), INT32_C( -905810098), INT32_C( -367169902), INT32_C( 281731565)) },
9893 { simde_mm_set_epi32(INT32_C( 684505097), INT32_C( -943547273), INT32_C( 2024629977), INT32_C(-1297828743)),
9894 simde_mm_set_epi32(INT32_C( 129732692), INT32_C( -132692005), INT32_C( 1753719651), INT32_C( 797887589)),
9895 simde_mm_set_epi32(INT32_C( 1753719651), INT32_C( 2024629977), INT32_C( 797887589), INT32_C(-1297828743)) },
9896 { simde_mm_set_epi32(INT32_C( 387949033), INT32_C(-1389829892), INT32_C( 1634437660), INT32_C( 987104782)),
9897 simde_mm_set_epi32(INT32_C( 1472270782), INT32_C(-1211361843), INT32_C( 1300202787), INT32_C(-1458926351)),
9898 simde_mm_set_epi32(INT32_C( 1300202787), INT32_C( 1634437660), INT32_C(-1458926351), INT32_C( 987104782)) },
9899 { simde_mm_set_epi32(INT32_C( 1387165835), INT32_C( 1596078466), INT32_C( 2111249085), INT32_C( -462538744)),
9900 simde_mm_set_epi32(INT32_C( 726985449), INT32_C(-1973042432), INT32_C( 1711993574), INT32_C( -252208390)),
9901 simde_mm_set_epi32(INT32_C( 1711993574), INT32_C( 2111249085), INT32_C( -252208390), INT32_C( -462538744)) },
9902 { simde_mm_set_epi32(INT32_C( -873840401), INT32_C(-1400863586), INT32_C( -152555667), INT32_C( 262405549)),
9903 simde_mm_set_epi32(INT32_C( 1712851533), INT32_C( -780536033), INT32_C( 86038600), INT32_C(-1897640862)),
9904 simde_mm_set_epi32(INT32_C( 86038600), INT32_C( -152555667), INT32_C(-1897640862), INT32_C( 262405549)) },
9905 { simde_mm_set_epi32(INT32_C( 713207805), INT32_C( 1018253476), INT32_C( -802651368), INT32_C( 648837212)),
9906 simde_mm_set_epi32(INT32_C(-1970856576), INT32_C(-1694188003), INT32_C( -44893783), INT32_C( 1278510166)),
9907 simde_mm_set_epi32(INT32_C( -44893783), INT32_C( -802651368), INT32_C( 1278510166), INT32_C( 648837212)) },
9908 { simde_mm_set_epi32(INT32_C(-2023310953), INT32_C(-2005088176), INT32_C( -384132833), INT32_C( -492148829)),
9909 simde_mm_set_epi32(INT32_C(-1912005399), INT32_C( 2099038707), INT32_C( -679752655), INT32_C(-1564588725)),
9910 simde_mm_set_epi32(INT32_C( -679752655), INT32_C( -384132833), INT32_C(-1564588725), INT32_C( -492148829)) },
9911 { simde_mm_set_epi32(INT32_C( -499582549), INT32_C( -35498139), INT32_C(-1863448787), INT32_C( 543686244)),
9912 simde_mm_set_epi32(INT32_C( 1271464253), INT32_C(-2102116933), INT32_C(-1435579282), INT32_C( 1623804943)),
9913 simde_mm_set_epi32(INT32_C(-1435579282), INT32_C(-1863448787), INT32_C( 1623804943), INT32_C( 543686244)) }
9914 };
9915
9916 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9917 simde__m128i r = simde_mm_unpacklo_epi32(test_vec[i].a, test_vec[i].b);
9918 simde_assert_m128i_i32(r, ==, test_vec[i].r);
9919 }
9920
9921 return 0;
9922 }
9923
9924 static int
9925 test_simde_mm_unpacklo_epi64(SIMDE_MUNIT_TEST_ARGS) {
9926 const struct {
9927 simde__m128i a;
9928 simde__m128i b;
9929 simde__m128i r;
9930 } test_vec[8] = {
9931 { simde_mm_set_epi64x(INT64_C(-3311375003394119518), INT64_C(-3890424747014823443)),
9932 simde_mm_set_epi64x(INT64_C( 2946394084060780343), INT64_C( 8747375856342363794)),
9933 simde_mm_set_epi64x(INT64_C( 8747375856342363794), INT64_C(-3890424747014823443)) },
9934 { simde_mm_set_epi64x(INT64_C( 2939927008911727735), INT64_C( 8695719540713370745)),
9935 simde_mm_set_epi64x(INT64_C( 557197673524316123), INT64_C( 7532168548195421285)),
9936 simde_mm_set_epi64x(INT64_C( 7532168548195421285), INT64_C( 8695719540713370745)) },
9937 { simde_mm_set_epi64x(INT64_C( 1666228412154962172), INT64_C( 7019856298037872142)),
9938 simde_mm_set_epi64x(INT64_C( 6323354862629950925), INT64_C( 5584328451169094897)),
9939 simde_mm_set_epi64x(INT64_C( 5584328451169094897), INT64_C( 7019856298037872142)) },
9940 { simde_mm_set_epi64x(INT64_C( 5957831897049610626), INT64_C( 9067745777617352712)),
9941 simde_mm_set_epi64x(INT64_C( 3122378730444800768), INT64_C( 7352956415334914810)),
9942 simde_mm_set_epi64x(INT64_C( 7352956415334914810), INT64_C( 9067745777617352712)) },
9943 { simde_mm_set_epi64x(INT64_C(-3753115941324421986), INT64_C( -655221600322060883)),
9944 simde_mm_set_epi64x(INT64_C( 7356641320652896031), INT64_C( 369532975590952034)),
9945 simde_mm_set_epi64x(INT64_C( 369532975590952034), INT64_C( -655221600322060883)) },
9946 { simde_mm_set_epi64x(INT64_C( 3063204198745198756), INT64_C(-3447361375000823716)),
9947 simde_mm_set_epi64x(INT64_C(-8464764536425759203), INT64_C( -192817328500210602)),
9948 simde_mm_set_epi64x(INT64_C( -192817328500210602), INT64_C(-3447361375000823716)) },
9949 { simde_mm_set_epi64x(INT64_C(-8690054370483713968), INT64_C(-1649837951252011101)),
9950 simde_mm_set_epi64x(INT64_C(-8212000656381392397), INT64_C(-2919515419863792309)),
9951 simde_mm_set_epi64x(INT64_C(-2919515419863792309), INT64_C(-1649837951252011101)) },
9952 { simde_mm_set_epi64x(INT64_C(-2145690705347848347), INT64_C(-8003451597392183708)),
9953 simde_mm_set_epi64x(INT64_C( 5460897386860920251), INT64_C(-6165766065381356529)),
9954 simde_mm_set_epi64x(INT64_C(-6165766065381356529), INT64_C(-8003451597392183708)) }
9955 };
9956
9957 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9958 simde__m128i r = simde_mm_unpacklo_epi64(test_vec[i].a, test_vec[i].b);
9959 simde_assert_m128i_i64(r, ==, test_vec[i].r);
9960 }
9961
9962 return 0;
9963 }
9964
9965 static int
9966 test_simde_mm_unpacklo_pd(SIMDE_MUNIT_TEST_ARGS) {
9967 const struct {
9968 simde__m128d a;
9969 simde__m128d b;
9970 simde__m128d r;
9971 } test_vec[8] = {
9972 { simde_mm_set_pd(SIMDE_FLOAT64_C( 160.82), SIMDE_FLOAT64_C( -868.81)),
9973 simde_mm_set_pd(SIMDE_FLOAT64_C( 640.98), SIMDE_FLOAT64_C( 578.20)),
9974 simde_mm_set_pd(SIMDE_FLOAT64_C( 578.20), SIMDE_FLOAT64_C( -868.81)) },
9975 { simde_mm_set_pd(SIMDE_FLOAT64_C( -900.35), SIMDE_FLOAT64_C( 829.02)),
9976 simde_mm_set_pd(SIMDE_FLOAT64_C( -680.55), SIMDE_FLOAT64_C( -51.61)),
9977 simde_mm_set_pd(SIMDE_FLOAT64_C( -51.61), SIMDE_FLOAT64_C( 829.02)) },
9978 { simde_mm_set_pd(SIMDE_FLOAT64_C( 560.63), SIMDE_FLOAT64_C( 395.65)),
9979 simde_mm_set_pd(SIMDE_FLOAT64_C( -681.25), SIMDE_FLOAT64_C( -57.21)),
9980 simde_mm_set_pd(SIMDE_FLOAT64_C( -57.21), SIMDE_FLOAT64_C( 395.65)) },
9981 { simde_mm_set_pd(SIMDE_FLOAT64_C( 938.21), SIMDE_FLOAT64_C( -628.45)),
9982 simde_mm_set_pd(SIMDE_FLOAT64_C( -939.59), SIMDE_FLOAT64_C( -183.36)),
9983 simde_mm_set_pd(SIMDE_FLOAT64_C( -183.36), SIMDE_FLOAT64_C( -628.45)) },
9984 { simde_mm_set_pd(SIMDE_FLOAT64_C( 352.81), SIMDE_FLOAT64_C( -540.34)),
9985 simde_mm_set_pd(SIMDE_FLOAT64_C( -819.35), SIMDE_FLOAT64_C( -238.91)),
9986 simde_mm_set_pd(SIMDE_FLOAT64_C( -238.91), SIMDE_FLOAT64_C( -540.34)) },
9987 { simde_mm_set_pd(SIMDE_FLOAT64_C( 435.92), SIMDE_FLOAT64_C( 320.63)),
9988 simde_mm_set_pd(SIMDE_FLOAT64_C( -314.42), SIMDE_FLOAT64_C( -394.55)),
9989 simde_mm_set_pd(SIMDE_FLOAT64_C( -394.55), SIMDE_FLOAT64_C( 320.63)) },
9990 { simde_mm_set_pd(SIMDE_FLOAT64_C( -256.77), SIMDE_FLOAT64_C( 784.61)),
9991 simde_mm_set_pd(SIMDE_FLOAT64_C( -354.05), SIMDE_FLOAT64_C( -16.87)),
9992 simde_mm_set_pd(SIMDE_FLOAT64_C( -16.87), SIMDE_FLOAT64_C( 784.61)) },
9993 { simde_mm_set_pd(SIMDE_FLOAT64_C( 81.23), SIMDE_FLOAT64_C( 882.56)),
9994 simde_mm_set_pd(SIMDE_FLOAT64_C( -661.47), SIMDE_FLOAT64_C( -202.79)),
9995 simde_mm_set_pd(SIMDE_FLOAT64_C( -202.79), SIMDE_FLOAT64_C( 882.56)) }
9996 };
9997
9998 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9999 simde__m128d r = simde_mm_unpacklo_pd(test_vec[i].a, test_vec[i].b);
10000 simde_assert_m128d_close(r, test_vec[i].r, 1);
10001 }
10002
10003 return 0;
10004 }
10005
10006 static int
10007 test_simde_mm_xor_pd(SIMDE_MUNIT_TEST_ARGS) {
10008 simde__m128d
10009 all_set = simde_x_mm_setone_pd(),
10010 all_unset = simde_mm_setzero_pd();
10011
10012 simde_assert_m128d_equal(simde_mm_xor_pd(all_set, all_unset), all_set);
10013 simde_assert_m128d_equal(simde_mm_xor_pd(all_set, all_set), all_unset);
10014 simde_assert_m128d_equal(simde_mm_xor_pd(all_unset, all_unset), all_unset);
10015
10016 return 0;
10017 }
10018
10019 static int
10020 test_simde_mm_xor_si128(SIMDE_MUNIT_TEST_ARGS) {
10021 const struct {
10022 simde__m128i a;
10023 simde__m128i b;
10024 simde__m128i r;
10025 } test_vec[8] = {
10026 { simde_mm_set_epi32(INT32_C( 1623880239), INT32_C( 1318620160), INT32_C(-1283662193), INT32_C(-1453845482)),
10027 simde_mm_set_epi32(INT32_C(-1675083604), INT32_C( 603168286), INT32_C( 409798099), INT32_C( 632966287)),
10028 simde_mm_set_epi32(INT32_C( -52284797), INT32_C( 1835733534), INT32_C(-1424893092), INT32_C(-1931323239)) },
10029 { simde_mm_set_epi32(INT32_C( 1509092554), INT32_C( 1648495442), INT32_C(-1486316171), INT32_C( 868417203)),
10030 simde_mm_set_epi32(INT32_C( 1183220554), INT32_C(-1650741405), INT32_C(-1277877547), INT32_C( -793058853)),
10031 simde_mm_set_epi32(INT32_C( 527724416), INT32_C( -2513871), INT32_C( 347979680), INT32_C( -478666904)) },
10032 { simde_mm_set_epi32(INT32_C( 373711788), INT32_C(-1451210820), INT32_C( 1218370771), INT32_C( 1535794325)),
10033 simde_mm_set_epi32(INT32_C( -155546503), INT32_C(-2037105503), INT32_C( 1041195962), INT32_C(-1654529737)),
10034 simde_mm_set_epi32(INT32_C( -520294443), INT32_C( 789871389), INT32_C( 1989263209), INT32_C( -957629022)) },
10035 { simde_mm_set_epi32(INT32_C(-1223418601), INT32_C( 332961755), INT32_C( 688173092), INT32_C( 352304516)),
10036 simde_mm_set_epi32(INT32_C( -734452212), INT32_C( -791801405), INT32_C( 114386244), INT32_C( 996038140)),
10037 simde_mm_set_epi32(INT32_C( 1663908635), INT32_C(-1021934056), INT32_C( 802542944), INT32_C( 799139960)) },
10038 { simde_mm_set_epi32(INT32_C( 1204298996), INT32_C( 1777561493), INT32_C( 531158614), INT32_C(-1345218351)),
10039 simde_mm_set_epi32(INT32_C( 465699923), INT32_C(-1417149028), INT32_C(-1963684061), INT32_C( -837148929)),
10040 simde_mm_set_epi32(INT32_C( 1544167591), INT32_C(-1032099319), INT32_C(-1789109899), INT32_C( 1640728110)) },
10041 { simde_mm_set_epi32(INT32_C( 1401162168), INT32_C( -922039657), INT32_C( 1348044504), INT32_C( 1592606181)),
10042 simde_mm_set_epi32(INT32_C(-1635510345), INT32_C(-1462861610), INT32_C(-1206905626), INT32_C( -326154944)),
10043 simde_mm_set_epi32(INT32_C( -855630321), INT32_C( 1640254017), INT32_C( -397831618), INT32_C(-1302169435)) },
10044 { simde_mm_set_epi32(INT32_C( 882266138), INT32_C( 2140233068), INT32_C( -978476725), INT32_C( -962797184)),
10045 simde_mm_set_epi32(INT32_C( 1476434174), INT32_C( 732384170), INT32_C( 406886944), INT32_C( 1700501859)),
10046 simde_mm_set_epi32(INT32_C( 1821821156), INT32_C( 1412830918), INT32_C( -571655317), INT32_C(-1547208477)) },
10047 { simde_mm_set_epi32(INT32_C( 782585313), INT32_C( 1758933973), INT32_C(-1583302414), INT32_C(-1602193751)),
10048 simde_mm_set_epi32(INT32_C( 760188951), INT32_C( 624290102), INT32_C( 378021852), INT32_C(-1714147587)),
10049 simde_mm_set_epi32(INT32_C( 65723894), INT32_C( 1306712803), INT32_C(-1222074578), INT32_C( 961828948)) }
10050 };
10051
10052 for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
10053 simde__m128i r = simde_mm_xor_si128(test_vec[i].a, test_vec[i].b);
10054 simde_assert_m128i_i64(r, ==, test_vec[i].r);
10055 }
10056
10057 return 0;
10058 }
10059
10060 static int
10061 test_simde_x_mm_not_si128(SIMDE_MUNIT_TEST_ARGS) {
10062 const struct {
10063 simde__m128i a;
10064 simde__m128i r;
10065 } test_vec[8] = {
10066 { simde_mm_set_epi32(INT32_C( -817965525), INT32_C( 2140859656), INT32_C( 142941694), INT32_C(-1061432158)),
10067 simde_mm_set_epi32(INT32_C( 817965524), INT32_C(-2140859657), INT32_C( -142941695), INT32_C( 1061432157)) },
10068 { simde_mm_set_epi32(INT32_C( 1656377120), INT32_C( 1182756765), INT32_C( 499148047), INT32_C( 1939837842)),
10069 simde_mm_set_epi32(INT32_C(-1656377121), INT32_C(-1182756766), INT32_C( -499148048), INT32_C(-1939837843)) },
10070 { simde_mm_set_epi32(INT32_C(-1391390683), INT32_C( -880299242), INT32_C( 1262346433), INT32_C(-1162276292)),
10071 simde_mm_set_epi32(INT32_C( 1391390682), INT32_C( 880299241), INT32_C(-1262346434), INT32_C( 1162276291)) },
10072 { simde_mm_set_epi32(INT32_C( 402553699), INT32_C(-1406117325), INT32_C(-1620159472), INT32_C( 1950201834)),
10073 simde_mm_set_epi32(INT32_C( -402553700), INT32_C( 1406117324), INT32_C( 1620159471), INT32_C(-1950201835)) },
10074 { simde_mm_set_epi32(INT32_C( 1201512664), INT32_C( -722158977), INT32_C(-1427673018), INT32_C(-1348620069)),
10075 simde_mm_set_epi32(INT32_C(-1201512665), INT32_C( 722158976), INT32_C( 1427673017), INT32_C( 1348620068)) },
10076 { simde_mm_set_epi32(INT32_C( 2022239253), INT32_C( 336656978), INT32_C(-2043097029), INT32_C( 2060912582)),
10077 simde_mm_set_epi32(INT32_C(-2022239254), INT32_C( -336656979), INT32_C( 2043097028), INT32_C(-2060912583)) },
10078 { simde_mm_set_epi32(INT32_C(-1767401405), INT32_C( 988173440), INT32_C( 653493949), INT32_C( 1545873213)),
10079 simde_mm_set_epi32(INT32_C( 1767401404), INT32_C( -988173441), INT32_C( -653493950), INT32_C(-1545873214)) },
10080 { simde_mm_set_epi32(INT32_C( 164259681), INT32_C( 1625402133), INT32_C( 274817939), INT32_C( 1382941610)),
10081 simde_mm_set_epi32(INT32_C( -164259682), INT32_C(-1625402134), INT32_C( -274817940), INT32_C(-1382941611)) }
10082 };
10083
10084 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
10085 simde__m128i r = simde_x_mm_not_si128(test_vec[i].a);
10086 simde_assert_m128i_i32(r, ==, test_vec[i].r);
10087 }
10088
10089 return 0;
10090 }
10091
10092 static int
10093 test_simde_x_mm_mul_epi64(SIMDE_MUNIT_TEST_ARGS) {
10094 const struct {
10095 simde__m128i a;
10096 simde__m128i b;
10097 simde__m128i r;
10098 } test_vec[8] = {
10099 { simde_mm_set_epi64x(INT64_C(-6673265146157132667), INT64_C(-8036865195274064518)),
10100 simde_mm_set_epi64x(INT64_C( 4763310881806863840), INT64_C(-2953190602401137090)),
10101 simde_mm_set_epi64x(INT64_C( 804621865193403744), INT64_C( 7037306546512957324)) },
10102 { simde_mm_set_epi64x(INT64_C( 4912321112367014754), INT64_C( 5506077972841640415)),
10103 simde_mm_set_epi64x(INT64_C( 5790159379234202843), INT64_C(-7860297575342104977)),
10104 simde_mm_set_epi64x(INT64_C(-6503632121046397738), INT64_C(-1366099594229104207)) },
10105 { simde_mm_set_epi64x(INT64_C( 2749162021411530208), INT64_C( 408462426494202626)),
10106 simde_mm_set_epi64x(INT64_C( 8447492608754880299), INT64_C(-7046703966410124624)),
10107 simde_mm_set_epi64x(INT64_C(-4973831282761794400), INT64_C( 2599589224149726560)) },
10108 { simde_mm_set_epi64x(INT64_C( -88834185851708236), INT64_C(-8089393205327952234)),
10109 simde_mm_set_epi64x(INT64_C( 381269932343520540), INT64_C( 2138325983301945876)),
10110 simde_mm_set_epi64x(INT64_C(-7088569628310845520), INT64_C( 1233235991476166584)) },
10111 { simde_mm_set_epi64x(INT64_C( -822706701071313394), INT64_C(-2759012498076821456)),
10112 simde_mm_set_epi64x(INT64_C( 3465917358098376677), INT64_C(-7954598628423398790)),
10113 simde_mm_set_epi64x(INT64_C(-6698232051336684410), INT64_C(-6956668788971772192)) },
10114 { simde_mm_set_epi64x(INT64_C( 8188114688325369058), INT64_C(-5073366312523094897)),
10115 simde_mm_set_epi64x(INT64_C(-8915693716470801407), INT64_C( 9186903668894606147)),
10116 simde_mm_set_epi64x(INT64_C( 3677373050832155874), INT64_C(-2924803137816977811)) },
10117 { simde_mm_set_epi64x(INT64_C(-5966336380315033651), INT64_C( 8263120995643775133)),
10118 simde_mm_set_epi64x(INT64_C(-4262947749795433008), INT64_C(-8185205248719856231)),
10119 simde_mm_set_epi64x(INT64_C(-5369329972927887472), INT64_C(-4868166633591505195)) },
10120 { simde_mm_set_epi64x(INT64_C( 2800078338557512603), INT64_C(-7382248080413965284)),
10121 simde_mm_set_epi64x(INT64_C( -645055313537887494), INT64_C( 2018860835012845242)),
10122 simde_mm_set_epi64x(INT64_C( 3796538949364005726), INT64_C(-1962708987484978088)) }
10123 };
10124
10125 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
10126 simde__m128i r = simde_x_mm_mul_epi64(test_vec[i].a, test_vec[i].b);
10127 simde_assert_m128i_i64(r, ==, test_vec[i].r);
10128 }
10129
10130 return 0;
10131 }
10132
10133 static int
10134 test_simde_x_mm_sub_epu32(SIMDE_MUNIT_TEST_ARGS) {
10135 const struct {
10136 simde__m128i a;
10137 simde__m128i b;
10138 simde__m128i r;
10139 } test_vec[8] = {
10140 { simde_x_mm_set_epu32(UINT32_C( 591915169), UINT32_C(1162556909), UINT32_C(2711661198), UINT32_C( 649386420)),
10141 simde_x_mm_set_epu32(UINT32_C(2963858433), UINT32_C( 387638488), UINT32_C(4276033779), UINT32_C(2372843734)),
10142 simde_x_mm_set_epu32(UINT32_C(1923024032), UINT32_C( 774918421), UINT32_C(2730594715), UINT32_C(2571509982)) },
10143 { simde_x_mm_set_epu32(UINT32_C( 445936307), UINT32_C(1465838226), UINT32_C(3055798709), UINT32_C(2785403539)),
10144 simde_x_mm_set_epu32(UINT32_C(2376292101), UINT32_C(2800453656), UINT32_C(2012288479), UINT32_C(1067663469)),
10145 simde_x_mm_set_epu32(UINT32_C(2364611502), UINT32_C(2960351866), UINT32_C(1043510230), UINT32_C(1717740070)) },
10146 { simde_x_mm_set_epu32(UINT32_C( 766825118), UINT32_C(3689178364), UINT32_C(1309713860), UINT32_C(1635279642)),
10147 simde_x_mm_set_epu32(UINT32_C(1287494965), UINT32_C(3931214929), UINT32_C( 130800549), UINT32_C(1579059128)),
10148 simde_x_mm_set_epu32(UINT32_C(3774297449), UINT32_C(4052930731), UINT32_C(1178913311), UINT32_C( 56220514)) },
10149 { simde_x_mm_set_epu32(UINT32_C(1521150506), UINT32_C( 229274390), UINT32_C(2137370048), UINT32_C(1343959137)),
10150 simde_x_mm_set_epu32(UINT32_C( 919906837), UINT32_C(4230649021), UINT32_C(2105941239), UINT32_C(3460244161)),
10151 simde_x_mm_set_epu32(UINT32_C( 601243669), UINT32_C( 293592665), UINT32_C( 31428809), UINT32_C(2178682272)) },
10152 { simde_x_mm_set_epu32(UINT32_C(1275529272), UINT32_C(2231818861), UINT32_C(2063802469), UINT32_C(3732401863)),
10153 simde_x_mm_set_epu32(UINT32_C(2896374047), UINT32_C(1493829257), UINT32_C(2939390855), UINT32_C(1941911553)),
10154 simde_x_mm_set_epu32(UINT32_C(2674122521), UINT32_C( 737989604), UINT32_C(3419378910), UINT32_C(1790490310)) },
10155 { simde_x_mm_set_epu32(UINT32_C(3017205359), UINT32_C(2429422013), UINT32_C(3351841835), UINT32_C(2341203472)),
10156 simde_x_mm_set_epu32(UINT32_C(3000898366), UINT32_C(1136654732), UINT32_C(2535059098), UINT32_C( 90134778)),
10157 simde_x_mm_set_epu32(UINT32_C( 16306993), UINT32_C(1292767281), UINT32_C( 816782737), UINT32_C(2251068694)) },
10158 { simde_x_mm_set_epu32(UINT32_C( 71842021), UINT32_C(1910901245), UINT32_C( 252676465), UINT32_C(3861146107)),
10159 simde_x_mm_set_epu32(UINT32_C(4061170475), UINT32_C(3890236125), UINT32_C(1645686841), UINT32_C(3708385897)),
10160 simde_x_mm_set_epu32(UINT32_C( 305638842), UINT32_C(2315632416), UINT32_C(2901956920), UINT32_C( 152760210)) },
10161 { simde_x_mm_set_epu32(UINT32_C(1390785465), UINT32_C( 237201350), UINT32_C(3330556421), UINT32_C( 382557765)),
10162 simde_x_mm_set_epu32(UINT32_C( 919261037), UINT32_C(4138415457), UINT32_C( 812238579), UINT32_C( 103076353)),
10163 simde_x_mm_set_epu32(UINT32_C( 471524428), UINT32_C( 393753189), UINT32_C(2518317842), UINT32_C( 279481412)) }
10164 };
10165
10166 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
10167 simde__m128i r = simde_x_mm_sub_epu32(test_vec[i].a, test_vec[i].b);
10168 simde_assert_m128i_u32(r, ==, test_vec[i].r);
10169 }
10170
10171 return 0;
10172 }
10173
10174 static int
10175 test_simde_x_mm_mod_epi64(SIMDE_MUNIT_TEST_ARGS) {
10176 const struct {
10177 simde__m128i a;
10178 simde__m128i b;
10179 simde__m128i r;
10180 } test_vec[8] = {
10181 { simde_mm_set_epi64x(INT64_C(-8053187774363015445), INT64_C( 9050551738356525681)),
10182 simde_mm_set_epi64x(INT64_C(-5432362900125533563), INT64_C( 4656333831414330662)),
10183 simde_mm_set_epi64x(INT64_C(-2620824874237481882), INT64_C( 4394217906942195019)) },
10184 { simde_mm_set_epi64x(INT64_C( 3643434954039553447), INT64_C(-6234539097175065740)),
10185 simde_mm_set_epi64x(INT64_C(-1834126128625936904), INT64_C( 5974848154734978575)),
10186 simde_mm_set_epi64x(INT64_C( 1809308825413616543), INT64_C( -259690942440087165)) },
10187 { simde_mm_set_epi64x(INT64_C( 9161306297850640165), INT64_C(-8306180370740150176)),
10188 simde_mm_set_epi64x(INT64_C( 2055562205091916701), INT64_C(-6680168448646461201)),
10189 simde_mm_set_epi64x(INT64_C( 939057477482973361), INT64_C(-1626011922093688975)) },
10190 { simde_mm_set_epi64x(INT64_C(-8267679289606370918), INT64_C(-5928191487249150522)),
10191 simde_mm_set_epi64x(INT64_C( -367540592738432621), INT64_C(-9016984680455221058)),
10192 simde_mm_set_epi64x(INT64_C( -181786249360853256), INT64_C(-5928191487249150522)) },
10193 { simde_mm_set_epi64x(INT64_C(-5911217161035399691), INT64_C(-1038656028139092449)),
10194 simde_mm_set_epi64x(INT64_C( 170272479168034452), INT64_C( 6507756447489319344)),
10195 simde_mm_set_epi64x(INT64_C( -121952869322228323), INT64_C(-1038656028139092449)) },
10196 { simde_mm_set_epi64x(INT64_C(-8644627274378588029), INT64_C( 5613017538463476646)),
10197 simde_mm_set_epi64x(INT64_C(-8247421513208151154), INT64_C(-1150990985458942599)),
10198 simde_mm_set_epi64x(INT64_C( -397205761170436875), INT64_C( 1009053596627706250)) },
10199 { simde_mm_set_epi64x(INT64_C( 8688276933216716257), INT64_C( -409477294924409172)),
10200 simde_mm_set_epi64x(INT64_C( 7651480072460119172), INT64_C( 5980691967331237074)),
10201 simde_mm_set_epi64x(INT64_C( 1036796860756597085), INT64_C( -409477294924409172)) },
10202 { simde_mm_set_epi64x(INT64_C(-6308927419868714376), INT64_C( 6327163388033237975)),
10203 simde_mm_set_epi64x(INT64_C( 4310605020200368092), INT64_C( 1934689183910316990)),
10204 simde_mm_set_epi64x(INT64_C(-1998322399668346284), INT64_C( 523095836302287005)) }
10205 };
10206
10207 for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
10208 simde__m128i r = simde_x_mm_mod_epi64(test_vec[i].a, test_vec[i].b);
10209 simde_assert_m128i_i64(r, ==, test_vec[i].r);
10210 }
10211
10212 return 0;
10213 }
10214
10215 SIMDE_TEST_FUNC_LIST_BEGIN
10216 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_pd)
10217 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_pd)
10218 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_pd1)
10219 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_sd)
10220 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi8)
10221 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi16)
10222 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi32)
10223 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi64)
10224 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi64x)
10225 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi8)
10226 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi16)
10227 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi32)
10228 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi64)
10229 SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi64x)
10230 SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_pd)
10231 SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_epi8)
10232 SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_epi16)
10233 SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_epi32)
10234 SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_epi64)
10235 SIMDE_TEST_FUNC_LIST_ENTRY(mm_setzero_pd)
10236 SIMDE_TEST_FUNC_LIST_ENTRY(mm_setzero_si128)
10237
10238 SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_abs_pd)
10239
10240 SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_epi8)
10241 SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_epi16)
10242 SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_epi32)
10243 SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_epi64)
10244 SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_pd)
10245 SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_sd)
10246 SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_si64)
10247 SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_epi8)
10248 SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_epi16)
10249 SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_epu8)
10250 SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_epu16)
10251
10252 SIMDE_TEST_FUNC_LIST_ENTRY(mm_and_pd)
10253 SIMDE_TEST_FUNC_LIST_ENTRY(mm_and_si128)
10254 SIMDE_TEST_FUNC_LIST_ENTRY(mm_andnot_pd)
10255 SIMDE_TEST_FUNC_LIST_ENTRY(mm_andnot_si128)
10256
10257 SIMDE_TEST_FUNC_LIST_ENTRY(mm_avg_epu8)
10258 SIMDE_TEST_FUNC_LIST_ENTRY(mm_avg_epu16)
10259
10260 SIMDE_TEST_FUNC_LIST_ENTRY(mm_bslli_si128)
10261 SIMDE_TEST_FUNC_LIST_ENTRY(mm_bsrli_si128)
10262 SIMDE_TEST_FUNC_LIST_ENTRY(mm_slli_epi16)
10263 SIMDE_TEST_FUNC_LIST_ENTRY(mm_slli_epi32)
10264 SIMDE_TEST_FUNC_LIST_ENTRY(mm_slli_epi64)
10265 SIMDE_TEST_FUNC_LIST_ENTRY(mm_srli_epi16)
10266 SIMDE_TEST_FUNC_LIST_ENTRY(mm_srli_epi32)
10267 SIMDE_TEST_FUNC_LIST_ENTRY(mm_srli_epi64)
10268
10269 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sra_epi16)
10270 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sra_epi32)
10271 SIMDE_TEST_FUNC_LIST_ENTRY(mm_srai_epi16)
10272 SIMDE_TEST_FUNC_LIST_ENTRY(mm_srai_epi32)
10273
10274 SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_pd)
10275 SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_pd1)
10276 SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_sd)
10277 SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_si128)
10278 SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeh_pd)
10279 SIMDE_TEST_FUNC_LIST_ENTRY(mm_storel_epi64)
10280 SIMDE_TEST_FUNC_LIST_ENTRY(mm_storel_pd)
10281 SIMDE_TEST_FUNC_LIST_ENTRY(mm_storer_pd)
10282 SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_pd)
10283 SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_si128)
10284 SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_si16)
10285 SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_si32)
10286 SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_si64)
10287 SIMDE_TEST_FUNC_LIST_ENTRY(mm_store1_pd)
10288
10289 SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_pd)
10290 SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_si128)
10291 SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_si32)
10292 SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_si64)
10293
10294 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_epi8)
10295 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_epi16)
10296 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_epi32)
10297 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_epi64)
10298 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_pd)
10299 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_sd)
10300 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_si64)
10301 SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_epi8)
10302 SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_epi16)
10303 SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_epu8)
10304 SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_epu16)
10305
10306 SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_epu8)
10307 SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_epi16)
10308 SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_pd)
10309 SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_sd)
10310 SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_epu8)
10311 SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_epi16)
10312 SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_pd)
10313 SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_sd)
10314
10315 SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_epu32)
10316 SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_pd)
10317 SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_sd)
10318 SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_su32)
10319 SIMDE_TEST_FUNC_LIST_ENTRY(mm_mulhi_epi16)
10320 SIMDE_TEST_FUNC_LIST_ENTRY(mm_mulhi_epu16)
10321 SIMDE_TEST_FUNC_LIST_ENTRY(mm_mullo_epi16)
10322
10323 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_epi8)
10324 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_epi16)
10325 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_epi32)
10326 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_pd)
10327 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_sd)
10328 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpneq_pd)
10329 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpneq_sd)
10330 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_epi8)
10331 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_epi16)
10332 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_epi32)
10333 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_pd)
10334 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_sd)
10335 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnlt_pd)
10336 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnlt_sd)
10337 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmple_pd)
10338 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmple_sd)
10339 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnle_pd)
10340 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnle_sd)
10341 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_epi8)
10342 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_epi16)
10343 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_epi32)
10344 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_pd)
10345 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_sd)
10346 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpngt_pd)
10347 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpngt_sd)
10348 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpge_pd)
10349 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpge_sd)
10350 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnge_pd)
10351 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnge_sd)
10352 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpord_pd)
10353 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpord_sd)
10354 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpunord_pd)
10355 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpunord_sd)
10356
10357 SIMDE_TEST_FUNC_LIST_ENTRY(mm_castpd_ps)
10358 SIMDE_TEST_FUNC_LIST_ENTRY(mm_castps_pd)
10359 SIMDE_TEST_FUNC_LIST_ENTRY(mm_castsi128_pd)
10360 SIMDE_TEST_FUNC_LIST_ENTRY(mm_castsi128_ps)
10361
10362 SIMDE_TEST_FUNC_LIST_ENTRY(mm_comieq_sd)
10363 SIMDE_TEST_FUNC_LIST_ENTRY(mm_comige_sd)
10364 SIMDE_TEST_FUNC_LIST_ENTRY(mm_comigt_sd)
10365 SIMDE_TEST_FUNC_LIST_ENTRY(mm_comile_sd)
10366 SIMDE_TEST_FUNC_LIST_ENTRY(mm_comilt_sd)
10367 SIMDE_TEST_FUNC_LIST_ENTRY(mm_comineq_sd)
10368 SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomieq_sd)
10369 SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomige_sd)
10370 SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomigt_sd)
10371 SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomile_sd)
10372 SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomilt_sd)
10373 SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomineq_sd)
10374
10375 SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_copysign_pd)
10376 SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_xorsign_pd)
10377
10378 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepi32_pd)
10379 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepi32_ps)
10380 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpd_epi32)
10381 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpd_pi32)
10382 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpd_ps)
10383 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpi32_pd)
10384 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtps_epi32)
10385 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtps_pd)
10386 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsd_f64)
10387 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsd_si32)
10388 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsd_si64)
10389 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsd_ss)
10390 SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_cvtsi128_si16)
10391 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi128_si32)
10392 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi128_si64)
10393 SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_cvtsi16_si128)
10394 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi32_sd)
10395 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi32_si128)
10396 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi64_sd)
10397 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi64_si128)
10398 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtss_sd)
10399 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttpd_epi32)
10400 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttpd_pi32)
10401 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttps_epi32)
10402 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttsd_si32)
10403 SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttsd_si64)
10404
10405 SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_pd)
10406 SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_sd)
10407
10408 SIMDE_TEST_FUNC_LIST_ENTRY(mm_extract_epi16)
10409 SIMDE_TEST_FUNC_LIST_ENTRY(mm_insert_epi16)
10410
10411 SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_pd)
10412 SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_pd1)
10413 SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_sd)
10414 SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_si128)
10415 SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadh_pd)
10416 SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadl_epi64)
10417 SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadl_pd)
10418 SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadr_pd)
10419 SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_pd)
10420 SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_si128)
10421 SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_si16)
10422 SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_si32)
10423 SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_si64)
10424
10425 SIMDE_TEST_FUNC_LIST_ENTRY(mm_movemask_epi8)
10426 SIMDE_TEST_FUNC_LIST_ENTRY(mm_movemask_pd)
10427 SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskmoveu_si128)
10428
10429 SIMDE_TEST_FUNC_LIST_ENTRY(mm_move_epi64)
10430 SIMDE_TEST_FUNC_LIST_ENTRY(mm_move_sd)
10431 SIMDE_TEST_FUNC_LIST_ENTRY(mm_movepi64_pi64)
10432 SIMDE_TEST_FUNC_LIST_ENTRY(mm_movpi64_epi64)
10433
10434 SIMDE_TEST_FUNC_LIST_ENTRY(mm_or_pd)
10435 SIMDE_TEST_FUNC_LIST_ENTRY(mm_or_si128)
10436
10437 SIMDE_TEST_FUNC_LIST_ENTRY(mm_packs_epi16)
10438 SIMDE_TEST_FUNC_LIST_ENTRY(mm_packs_epi32)
10439 SIMDE_TEST_FUNC_LIST_ENTRY(mm_packus_epi16)
10440
10441 SIMDE_TEST_FUNC_LIST_ENTRY(mm_undefined_pd)
10442 SIMDE_TEST_FUNC_LIST_ENTRY(mm_undefined_si128)
10443
10444 SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_epi8)
10445 SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_epi16)
10446 SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_epi32)
10447 SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_epi64)
10448 SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_pd)
10449 SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_epi8)
10450 SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_epi16)
10451 SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_epi32)
10452 SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_epi64)
10453 SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_pd)
10454
10455 SIMDE_TEST_FUNC_LIST_ENTRY(mm_shuffle_epi32)
10456 SIMDE_TEST_FUNC_LIST_ENTRY(mm_shuffle_pd)
10457 SIMDE_TEST_FUNC_LIST_ENTRY(mm_shufflehi_epi16)
10458 SIMDE_TEST_FUNC_LIST_ENTRY(mm_shufflelo_epi16)
10459
10460 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sll_epi16)
10461 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sll_epi32)
10462 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sll_epi64)
10463 SIMDE_TEST_FUNC_LIST_ENTRY(mm_srl_epi16)
10464 SIMDE_TEST_FUNC_LIST_ENTRY(mm_srl_epi32)
10465 SIMDE_TEST_FUNC_LIST_ENTRY(mm_srl_epi64)
10466
10467 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sqrt_pd)
10468 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sqrt_sd)
10469
10470 SIMDE_TEST_FUNC_LIST_ENTRY(mm_madd_epi16)
10471 SIMDE_TEST_FUNC_LIST_ENTRY(mm_sad_epu8)
10472
10473 SIMDE_TEST_FUNC_LIST_ENTRY(mm_xor_pd)
10474 SIMDE_TEST_FUNC_LIST_ENTRY(mm_xor_si128)
10475
10476 SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_not_si128)
10477 SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_sub_epu32)
10478 SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_mul_epi64)
10479 SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_mod_epi64)
10480 SIMDE_TEST_FUNC_LIST_END
10481
10482 #include <test/x86/test-x86-footer.h>
10483