1 /* Copyright (c) 2017, 2019 Evan Nemerson <evan@nemerson.com>
2  *
3  * Permission is hereby granted, free of charge, to any person
4  * obtaining a copy of this software and associated documentation
5  * files (the "Software"), to deal in the Software without
6  * restriction, including without limitation the rights to use, copy,
7  * modify, merge, publish, distribute, sublicense, and/or sell copies
8  * of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "test/test.h"
25 #define SIMDE_TESTS_CURRENT_ISAX sse2
26 #include <simde/x86/sse2.h>
27 #include <test/x86/test-sse2.h>
28 
29 #if defined(HEDLEY_MSVC_VERSION)
30 #  pragma warning(disable:4324)
31 #endif
32 
33 static int
test_simde_x_mm_abs_pd(SIMDE_MUNIT_TEST_ARGS)34 test_simde_x_mm_abs_pd (SIMDE_MUNIT_TEST_ARGS) {
35   static const struct {
36     const simde_float64 a[2];
37     const simde_float64 r[2];
38   } test_vec[] = {
39     { { SIMDE_FLOAT64_C(   147.28), SIMDE_FLOAT64_C(   704.65) },
40       { SIMDE_FLOAT64_C(   147.28), SIMDE_FLOAT64_C(   704.65) } },
41     { { SIMDE_FLOAT64_C(   136.85), SIMDE_FLOAT64_C(  -756.74) },
42       { SIMDE_FLOAT64_C(   136.85), SIMDE_FLOAT64_C(   756.74) } },
43     { { SIMDE_FLOAT64_C(   178.63), SIMDE_FLOAT64_C(  -900.20) },
44       { SIMDE_FLOAT64_C(   178.63), SIMDE_FLOAT64_C(   900.20) } },
45     { { SIMDE_FLOAT64_C(  -651.54), SIMDE_FLOAT64_C(  -517.72) },
46       { SIMDE_FLOAT64_C(   651.54), SIMDE_FLOAT64_C(   517.72) } },
47     { { SIMDE_FLOAT64_C(    75.39), SIMDE_FLOAT64_C(  -705.91) },
48       { SIMDE_FLOAT64_C(    75.39), SIMDE_FLOAT64_C(   705.91) } },
49     { { SIMDE_FLOAT64_C(  -738.47), SIMDE_FLOAT64_C(  -668.92) },
50       { SIMDE_FLOAT64_C(   738.47), SIMDE_FLOAT64_C(   668.92) } },
51     { { SIMDE_FLOAT64_C(   212.72), SIMDE_FLOAT64_C(  -499.79) },
52       { SIMDE_FLOAT64_C(   212.72), SIMDE_FLOAT64_C(   499.79) } },
53     { { SIMDE_FLOAT64_C(   481.67), SIMDE_FLOAT64_C(   233.48) },
54       { SIMDE_FLOAT64_C(   481.67), SIMDE_FLOAT64_C(   233.48) } }
55   };
56 
57   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
58     simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
59     simde__m128d r = simde_x_mm_abs_pd(a);
60     simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1);
61   }
62 
63   return 0;
64 }
65 
66 static int
test_simde_mm_add_epi8(SIMDE_MUNIT_TEST_ARGS)67 test_simde_mm_add_epi8 (SIMDE_MUNIT_TEST_ARGS) {
68   struct {
69     int8_t a[16];
70     int8_t b[16];
71     int8_t r[16];
72   } test_vec[] = {
73     { {  INT8_C(  90),  INT8_C( 118), -INT8_C(  35), -INT8_C(  66),  INT8_C(  97),  INT8_C(  96),  INT8_C(  20), -INT8_C( 105),
74          INT8_C(  78), -INT8_C(  32),  INT8_C( 110), -INT8_C(  33), -INT8_C(  12),  INT8_C(   9),  INT8_C( 119), -INT8_C(  73) },
75       { -INT8_C( 117), -INT8_C( 121),  INT8_C( 108), -INT8_C( 124), -INT8_C(  90),  INT8_C( 100), -INT8_C( 121), -INT8_C( 115),
76          INT8_C(  60),  INT8_C( 124),  INT8_C(  32), -INT8_C(  86),  INT8_C( 118), -INT8_C(  20), -INT8_C( 123), -INT8_C(  48) },
77       { -INT8_C(  27), -INT8_C(   3),  INT8_C(  73),  INT8_C(  66),  INT8_C(   7), -INT8_C(  60), -INT8_C( 101),  INT8_C(  36),
78         -INT8_C( 118),  INT8_C(  92), -INT8_C( 114), -INT8_C( 119),  INT8_C( 106), -INT8_C(  11), -INT8_C(   4), -INT8_C( 121) } },
79     { {  INT8_C(  99),  INT8_C(  98), -INT8_C( 113), -INT8_C(  60), -INT8_C(  62), -INT8_C(  93),  INT8_C(  92),  INT8_C(  16),
80         -INT8_C( 125), -INT8_C(  54), -INT8_C(  16),  INT8_C( 119), -INT8_C(  45),  INT8_C( 103),  INT8_C(  47),  INT8_C(  95) },
81       { -INT8_C(  18), -INT8_C( 101), -INT8_C(  29), -INT8_C( 108), -INT8_C(   1),  INT8_C( 106),  INT8_C(  34),  INT8_C(  59),
82         -INT8_C(  26),  INT8_C(  66), -INT8_C(  27),  INT8_C(  92),  INT8_C(  47),  INT8_C( 107),  INT8_C(  44), -INT8_C( 110) },
83       {  INT8_C(  81), -INT8_C(   3),  INT8_C( 114),  INT8_C(  88), -INT8_C(  63),  INT8_C(  13),  INT8_C( 126),  INT8_C(  75),
84          INT8_C( 105),  INT8_C(  12), -INT8_C(  43), -INT8_C(  45),  INT8_C(   2), -INT8_C(  46),  INT8_C(  91), -INT8_C(  15) } },
85     { { -INT8_C(  51), -INT8_C(  69),  INT8_C(  86), -INT8_C( 112),  INT8_C(  94), -INT8_C(  78), -INT8_C(  96), -INT8_C(  31),
86          INT8_C( 125), -INT8_C( 112),  INT8_C(  89),  INT8_C(  80), -INT8_C(   9), -INT8_C( 120), -INT8_C(  81), -INT8_C(  27) },
87       {  INT8_C(  35), -INT8_C( 110),  INT8_C( 122),  INT8_C(  34), -INT8_C(   4), -INT8_C( 100),  INT8_C(  94), -INT8_C(  30),
88         -INT8_C(  34),  INT8_C(  67),  INT8_C(  62),  INT8_C(  13), -INT8_C(  82),  INT8_C( 107), -INT8_C(  97),  INT8_C( 124) },
89       { -INT8_C(  16),  INT8_C(  77), -INT8_C(  48), -INT8_C(  78),  INT8_C(  90),  INT8_C(  78), -INT8_C(   2), -INT8_C(  61),
90          INT8_C(  91), -INT8_C(  45), -INT8_C( 105),  INT8_C(  93), -INT8_C(  91), -INT8_C(  13),  INT8_C(  78),  INT8_C(  97) } },
91     { {  INT8_C(  38), -INT8_C(  10),  INT8_C(  12), -INT8_C( 123), -INT8_C(  88), -INT8_C(  84),  INT8_C( 102),  INT8_C(  37),
92          INT8_C(  61), -INT8_C(  65),  INT8_C( 118),  INT8_C(  52),  INT8_C(  71),  INT8_C(  37),  INT8_C(  26),  INT8_C( 106) },
93       { -INT8_C(  72), -INT8_C( 108), -INT8_C( 115), -INT8_C(  76),  INT8_C(  48), -INT8_C(  21), -INT8_C( 105),  INT8_C(  14),
94          INT8_C(  46), -INT8_C(  43),  INT8_C(  28), -INT8_C(  35),  INT8_C(  64), -INT8_C(  69),  INT8_C(  89),  INT8_C( 103) },
95       { -INT8_C(  34), -INT8_C( 118), -INT8_C( 103),  INT8_C(  57), -INT8_C(  40), -INT8_C( 105), -INT8_C(   3),  INT8_C(  51),
96          INT8_C( 107), -INT8_C( 108), -INT8_C( 110),  INT8_C(  17), -INT8_C( 121), -INT8_C(  32),  INT8_C( 115), -INT8_C(  47) } },
97     { { -INT8_C(  79),  INT8_C( 101), -INT8_C(  20),  INT8_C(  90),  INT8_C(  17),  INT8_C(  82),      INT8_MAX,  INT8_C(  78),
98          INT8_C(  18), -INT8_C(  11), -INT8_C( 125),  INT8_C(  89),  INT8_C(  27), -INT8_C(  99), -INT8_C(  60), -INT8_C(  45) },
99       {  INT8_C(  49),  INT8_C(  81), -INT8_C( 121),  INT8_C(  97),  INT8_C(  60),  INT8_C(  30),  INT8_C( 111),  INT8_C( 106),
100         -INT8_C(  12), -INT8_C( 117),  INT8_C(  71),  INT8_C(  52),  INT8_C(  71), -INT8_C(  96), -INT8_C( 101), -INT8_C(   8) },
101       { -INT8_C(  30), -INT8_C(  74),  INT8_C( 115), -INT8_C(  69),  INT8_C(  77),  INT8_C( 112), -INT8_C(  18), -INT8_C(  72),
102          INT8_C(   6),      INT8_MIN, -INT8_C(  54), -INT8_C( 115),  INT8_C(  98),  INT8_C(  61),  INT8_C(  95), -INT8_C(  53) } },
103     { {  INT8_C(   5), -INT8_C( 121),  INT8_C(  82),  INT8_C(  23), -INT8_C(  38), -INT8_C(  46),  INT8_C( 101), -INT8_C(  20),
104         -INT8_C(  57), -INT8_C(  24),  INT8_C(  69), -INT8_C(  30), -INT8_C( 123),  INT8_C(   9), -INT8_C(  75), -INT8_C(  74) },
105       {  INT8_C(  90),  INT8_C(  61),  INT8_C(  23), -INT8_C( 106),  INT8_C(  91), -INT8_C( 121),  INT8_C(   1),  INT8_C(  79),
106          INT8_C(  18),  INT8_C(  72), -INT8_C( 124),  INT8_C(  89), -INT8_C(  23),  INT8_C(  31),  INT8_C(  82), -INT8_C(  18) },
107       {  INT8_C(  95), -INT8_C(  60),  INT8_C( 105), -INT8_C(  83),  INT8_C(  53),  INT8_C(  89),  INT8_C( 102),  INT8_C(  59),
108         -INT8_C(  39),  INT8_C(  48), -INT8_C(  55),  INT8_C(  59),  INT8_C( 110),  INT8_C(  40),  INT8_C(   7), -INT8_C(  92) } },
109     { { -INT8_C(  89), -INT8_C(  92),  INT8_C(   5), -INT8_C( 127),  INT8_C( 118),  INT8_C( 107),  INT8_C( 109),  INT8_C(  62),
110          INT8_C(  83), -INT8_C(  78),  INT8_C(  32), -INT8_C(  39), -INT8_C(  68), -INT8_C(  42), -INT8_C( 113),  INT8_C(  22) },
111       {  INT8_C(  19), -INT8_C(  89), -INT8_C(  83),  INT8_C( 110),  INT8_C(  46), -INT8_C(  82), -INT8_C(  66),  INT8_C(  64),
112         -INT8_C(  10),  INT8_C(  66), -INT8_C( 102), -INT8_C(  33),  INT8_C(  97), -INT8_C(  20), -INT8_C(  50),  INT8_C(   8) },
113       { -INT8_C(  70),  INT8_C(  75), -INT8_C(  78), -INT8_C(  17), -INT8_C(  92),  INT8_C(  25),  INT8_C(  43),  INT8_C( 126),
114          INT8_C(  73), -INT8_C(  12), -INT8_C(  70), -INT8_C(  72),  INT8_C(  29), -INT8_C(  62),  INT8_C(  93),  INT8_C(  30) } },
115     { { -INT8_C( 112), -INT8_C(  45), -INT8_C( 119),  INT8_C(   7),  INT8_C(  62), -INT8_C(  10),  INT8_C(  69), -INT8_C( 110),
116         -INT8_C(  87),  INT8_C( 101),  INT8_C( 107),  INT8_C( 101),  INT8_C(  59), -INT8_C(   6),  INT8_C( 123),  INT8_C(  78) },
117       { -INT8_C(  95),  INT8_C(  40), -INT8_C(  67), -INT8_C(  49), -INT8_C(  42),  INT8_C( 123),  INT8_C(  16), -INT8_C(  51),
118         -INT8_C(  67), -INT8_C(  86), -INT8_C(  84),  INT8_C(  30), -INT8_C( 106),  INT8_C( 122),  INT8_C(  39),  INT8_C(  38) },
119       {  INT8_C(  49), -INT8_C(   5),  INT8_C(  70), -INT8_C(  42),  INT8_C(  20),  INT8_C( 113),  INT8_C(  85),  INT8_C(  95),
120          INT8_C( 102),  INT8_C(  15),  INT8_C(  23), -INT8_C( 125), -INT8_C(  47),  INT8_C( 116), -INT8_C(  94),  INT8_C( 116) } }
121   };
122 
123   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
124     simde__m128i a = simde_x_mm_loadu_epi8(test_vec[i].a);
125     simde__m128i b = simde_x_mm_loadu_epi8(test_vec[i].b);
126     simde__m128i r = simde_mm_add_epi8(a, b);
127     simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r));
128   }
129 
130   return 0;
131 }
132 
133 static int
test_simde_mm_add_epi16(SIMDE_MUNIT_TEST_ARGS)134 test_simde_mm_add_epi16 (SIMDE_MUNIT_TEST_ARGS) {
135   struct {
136     int16_t a[8];
137     int16_t b[8];
138     int16_t r[8];
139   } test_vec[] = {
140     { { -INT16_C(  4111), -INT16_C( 19283),  INT16_C( 32346),  INT16_C( 31529),  INT16_C( 28909), -INT16_C( 11812),  INT16_C( 20575),  INT16_C( 29075) },
141       { -INT16_C( 31999),  INT16_C( 11862),  INT16_C(  4324), -INT16_C( 23595),  INT16_C( 24767), -INT16_C( 10354), -INT16_C( 11824), -INT16_C( 16113) },
142       {  INT16_C( 29426), -INT16_C(  7421), -INT16_C( 28866),  INT16_C(  7934), -INT16_C( 11860), -INT16_C( 22166),  INT16_C(  8751),  INT16_C( 12962) } },
143     { { -INT16_C( 17215),  INT16_C(  7029), -INT16_C( 24774),  INT16_C( 10134),  INT16_C( 29199),  INT16_C( 28409), -INT16_C( 29502), -INT16_C( 15137) },
144       {  INT16_C( 13584), -INT16_C(  2830), -INT16_C( 14522),  INT16_C(  1431),  INT16_C(  9512), -INT16_C(  1828), -INT16_C(  5129), -INT16_C( 18247) },
145       { -INT16_C(  3631),  INT16_C(  4199),  INT16_C( 26240),  INT16_C( 11565), -INT16_C( 26825),  INT16_C( 26581),  INT16_C( 30905),  INT16_C( 32152) } },
146     { {  INT16_C( 11944), -INT16_C(  7469),  INT16_C( 27085), -INT16_C(  9206),  INT16_C(   987), -INT16_C( 25013),  INT16_C( 10895), -INT16_C( 24734) },
147       {  INT16_C( 21600), -INT16_C( 22892),  INT16_C( 11036),  INT16_C( 17579), -INT16_C( 30895),  INT16_C( 18492), -INT16_C(  2701),  INT16_C(  6912) },
148       { -INT16_C( 31992), -INT16_C( 30361), -INT16_C( 27415),  INT16_C(  8373), -INT16_C( 29908), -INT16_C(  6521),  INT16_C(  8194), -INT16_C( 17822) } },
149     { { -INT16_C( 11485), -INT16_C(  3587),  INT16_C(  1852),  INT16_C(  6093),  INT16_C(  6154), -INT16_C( 25931),  INT16_C(  5955), -INT16_C( 23751) },
150       { -INT16_C( 12948), -INT16_C( 30647), -INT16_C(  2823),  INT16_C( 19148),  INT16_C(  2171), -INT16_C(  4462), -INT16_C( 27907),  INT16_C(  8201) },
151       { -INT16_C( 24433),  INT16_C( 31302), -INT16_C(   971),  INT16_C( 25241),  INT16_C(  8325), -INT16_C( 30393), -INT16_C( 21952), -INT16_C( 15550) } },
152     { {  INT16_C(  1893), -INT16_C( 24303), -INT16_C(  8434),  INT16_C(  6584),  INT16_C( 28407),  INT16_C( 15027), -INT16_C(  4987), -INT16_C(  3619) },
153       {  INT16_C(  9914), -INT16_C( 19591),  INT16_C( 17690), -INT16_C( 26883), -INT16_C( 28851),  INT16_C( 19076), -INT16_C( 29151), -INT16_C( 31125) },
154       {  INT16_C( 11807),  INT16_C( 21642),  INT16_C(  9256), -INT16_C( 20299), -INT16_C(   444), -INT16_C( 31433),  INT16_C( 31398),  INT16_C( 30792) } },
155     { {  INT16_C( 31893), -INT16_C( 23769), -INT16_C(  8357),  INT16_C( 21436),  INT16_C( 28493), -INT16_C( 11379),  INT16_C( 27484),  INT16_C(  5828) },
156       {  INT16_C( 16017), -INT16_C( 21303), -INT16_C( 14717), -INT16_C( 11966), -INT16_C( 14763),  INT16_C( 30235), -INT16_C( 31148), -INT16_C(  5636) },
157       { -INT16_C( 17626),  INT16_C( 20464), -INT16_C( 23074),  INT16_C(  9470),  INT16_C( 13730),  INT16_C( 18856), -INT16_C(  3664),  INT16_C(   192) } },
158     { {  INT16_C(  8963),  INT16_C( 24205),  INT16_C( 18690),  INT16_C( 20657),  INT16_C( 16313),  INT16_C(  5411), -INT16_C(  6230),  INT16_C( 15147) },
159       { -INT16_C(  3035), -INT16_C( 22041),  INT16_C( 10682),  INT16_C(  3962), -INT16_C( 27152),  INT16_C( 17541), -INT16_C( 32484),  INT16_C(  7982) },
160       {  INT16_C(  5928),  INT16_C(  2164),  INT16_C( 29372),  INT16_C( 24619), -INT16_C( 10839),  INT16_C( 22952),  INT16_C( 26822),  INT16_C( 23129) } },
161     { { -INT16_C( 17500), -INT16_C( 22915),  INT16_C( 12036), -INT16_C( 16906),  INT16_C(  6510),  INT16_C(  6354), -INT16_C(   767),  INT16_C(  9811) },
162       {  INT16_C( 15345), -INT16_C( 21553),  INT16_C( 18788),  INT16_C( 21690),  INT16_C( 16351), -INT16_C(  1127), -INT16_C( 14400),  INT16_C( 25626) },
163       { -INT16_C(  2155),  INT16_C( 21068),  INT16_C( 30824),  INT16_C(  4784),  INT16_C( 22861),  INT16_C(  5227), -INT16_C( 15167), -INT16_C( 30099) } }
164   };
165 
166   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
167     simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a);
168     simde__m128i b = simde_x_mm_loadu_epi16(test_vec[i].b);
169     simde__m128i r = simde_mm_add_epi16(a, b);
170     simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r));
171   }
172 
173   return 0;
174 }
175 
176 static int
test_simde_mm_add_epi32(SIMDE_MUNIT_TEST_ARGS)177 test_simde_mm_add_epi32 (SIMDE_MUNIT_TEST_ARGS) {
178   struct {
179     int32_t a[4];
180     int32_t b[4];
181     int32_t r[4];
182   } test_vec[] = {
183     { {  INT32_C(  1587156417),  INT32_C(  1768270179), -INT32_C(  1942404587),  INT32_C(   346970517) },
184       {  INT32_C(  2141391970),  INT32_C(  1584534422),  INT32_C(  1144809083), -INT32_C(   446909148) },
185       { -INT32_C(   566418909), -INT32_C(   942162695), -INT32_C(   797595504), -INT32_C(    99938631) } },
186     { {  INT32_C(   776206027), -INT32_C(  1265129313),  INT32_C(  2134954218), -INT32_C(  1953239511) },
187       { -INT32_C(  1861535750), -INT32_C(   974160566),  INT32_C(   134884324), -INT32_C(  1393727775) },
188       { -INT32_C(  1085329723),  INT32_C(  2055677417), -INT32_C(  2025128754),  INT32_C(   948000010) } },
189     { { -INT32_C(    69586852), -INT32_C(  1011912232),  INT32_C(  1782771777), -INT32_C(   638134562) },
190       {  INT32_C(  1466564877),  INT32_C(  1646090622),  INT32_C(  1718232965), -INT32_C(   384673907) },
191       {  INT32_C(  1396978025),  INT32_C(   634178390), -INT32_C(   793962554), -INT32_C(  1022808469) } },
192     { {  INT32_C(  1625615495), -INT32_C(  1641835683),  INT32_C(  1644717443),  INT32_C(  1211891259) },
193       {  INT32_C(  2124457471), -INT32_C(  2082423298),  INT32_C(  1911114724),  INT32_C(   710605730) },
194       { -INT32_C(   544894330),  INT32_C(   570708315), -INT32_C(   739135129),  INT32_C(  1922496989) } },
195     { {  INT32_C(  1149910759),  INT32_C(  1440918993),  INT32_C(  1320676114), -INT32_C(   375983383) },
196       { -INT32_C(  1788397929), -INT32_C(   686209037),  INT32_C(   893911698), -INT32_C(   446717186) },
197       { -INT32_C(   638487170),  INT32_C(   754709956), -INT32_C(  2080379484), -INT32_C(   822700569) } },
198     { { -INT32_C(  1305810464), -INT32_C(  1475933034), -INT32_C(   503922953),  INT32_C(  1204456880) },
199       { -INT32_C(  1210306109),  INT32_C(   193918328), -INT32_C(   163522568),  INT32_C(  1524342649) },
200       {  INT32_C(  1778850723), -INT32_C(  1282014706), -INT32_C(   667445521), -INT32_C(  1566167767) } },
201     { {  INT32_C(   504104328),  INT32_C(   163975954), -INT32_C(  2115322415),  INT32_C(   231257162) },
202       {  INT32_C(  1589945573), -INT32_C(  1838591078), -INT32_C(  1551324886), -INT32_C(   788700344) },
203       {  INT32_C(  2094049901), -INT32_C(  1674615124),  INT32_C(   628319995), -INT32_C(   557443182) } },
204     { {  INT32_C(  2079197545), -INT32_C(   310070244), -INT32_C(  1150390415),  INT32_C(   164181539) },
205       {  INT32_C(  1969720795),  INT32_C(   168284384), -INT32_C(  1045524615),  INT32_C(  1536273394) },
206       { -INT32_C(   246048956), -INT32_C(   141785860),  INT32_C(  2099052266),  INT32_C(  1700454933) } }
207   };
208 
209   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
210     simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a);
211     simde__m128i b = simde_x_mm_loadu_epi32(test_vec[i].b);
212     simde__m128i r = simde_mm_add_epi32(a, b);
213     simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
214   }
215 
216   return 0;
217 }
218 
219 static int
test_simde_mm_add_epi64(SIMDE_MUNIT_TEST_ARGS)220 test_simde_mm_add_epi64 (SIMDE_MUNIT_TEST_ARGS) {
221   struct {
222     int64_t a[2];
223     int64_t b[2];
224     int64_t r[2];
225   } test_vec[] = {
226     { { -INT64_C( 6468439616558299793),  INT64_C( 2325632228821341991) },
227       { -INT64_C(  612652056685655455), -INT64_C(  191691543793121214) },
228       { -INT64_C( 7081091673243955248),  INT64_C( 2133940685028220777) } },
229     { { -INT64_C(  894566178211475330),  INT64_C( 6756798005412736627) },
230       { -INT64_C( 3896691714656888127),  INT64_C( 2845879868330258419) },
231       { -INT64_C( 4791257892868363457), -INT64_C( 8844066199966556570) } },
232     { {  INT64_C( 7901755739001462504),  INT64_C( 1347655258826955098) },
233       {  INT64_C( 8953142355952099055),  INT64_C(  248677757309780642) },
234       { -INT64_C( 1591845978755990057),  INT64_C( 1596333016136735740) } },
235     { { -INT64_C( 8141839393087780454), -INT64_C( 2946030458831039558) },
236       { -INT64_C( 4972663281470790409),  INT64_C( 1165720327465335311) },
237       {  INT64_C( 5332241399150980753), -INT64_C( 1780310131365704247) } },
238     { { -INT64_C(   15861257455999742),  INT64_C( 4357558393977351353) },
239       {  INT64_C( 7214407425212598092), -INT64_C( 7045112387664469068) },
240       {  INT64_C( 7198546167756598350), -INT64_C( 2687553993687117715) } },
241     { {  INT64_C( 4532200698918854304),  INT64_C( 7262715306804571977) },
242       { -INT64_C(  803639368974039520), -INT64_C( 4520672699422448119) },
243       {  INT64_C( 3728561329944814784),  INT64_C( 2742042607382123858) } },
244     { { -INT64_C(   73591731732932298),  INT64_C( 6050399403914353275) },
245       { -INT64_C( 5903761005476331555), -INT64_C( 4762108524214604026) },
246       { -INT64_C( 5977352737209263853),  INT64_C( 1288290879699749249) } },
247     { { -INT64_C( 7465715716457918288),  INT64_C( 2653502295939739981) },
248       {  INT64_C( 4698470722568297185), -INT64_C( 3402942170898265983) },
249       { -INT64_C( 2767244993889621103), -INT64_C(  749439874958526002) } }
250   };
251 
252   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
253     simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a);
254     simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b);
255     simde__m128i r = simde_mm_add_epi64(a, b);
256     simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r));
257   }
258 
259   return 0;
260 }
261 
262 static int
test_simde_mm_add_pd(SIMDE_MUNIT_TEST_ARGS)263 test_simde_mm_add_pd (SIMDE_MUNIT_TEST_ARGS) {
264   struct {
265     simde_float64 a[2];
266     simde_float64 b[2];
267     simde_float64 r[2];
268   } test_vec[] = {
269     { { SIMDE_FLOAT64_C(   755.33), SIMDE_FLOAT64_C(   721.25) },
270       { SIMDE_FLOAT64_C(   781.60), SIMDE_FLOAT64_C(  -779.68) },
271       { SIMDE_FLOAT64_C(  1536.93), SIMDE_FLOAT64_C(   -58.44) } },
272     { { SIMDE_FLOAT64_C(  -566.45), SIMDE_FLOAT64_C(  -614.54) },
273       { SIMDE_FLOAT64_C(   194.36), SIMDE_FLOAT64_C(  -334.34) },
274       { SIMDE_FLOAT64_C(  -372.09), SIMDE_FLOAT64_C(  -948.88) } },
275     { { SIMDE_FLOAT64_C(   813.61), SIMDE_FLOAT64_C(  -315.29) },
276       { SIMDE_FLOAT64_C(   361.18), SIMDE_FLOAT64_C(   614.31) },
277       { SIMDE_FLOAT64_C(  1174.78), SIMDE_FLOAT64_C(   299.02) } },
278     { { SIMDE_FLOAT64_C(   824.96), SIMDE_FLOAT64_C(  -193.54) },
279       { SIMDE_FLOAT64_C(   701.59), SIMDE_FLOAT64_C(  -521.55) },
280       { SIMDE_FLOAT64_C(  1526.55), SIMDE_FLOAT64_C(  -715.09) } },
281     { { SIMDE_FLOAT64_C(  -703.59), SIMDE_FLOAT64_C(   322.49) },
282       { SIMDE_FLOAT64_C(   -26.00), SIMDE_FLOAT64_C(   910.61) },
283       { SIMDE_FLOAT64_C(  -729.59), SIMDE_FLOAT64_C(  1233.10) } },
284     { { SIMDE_FLOAT64_C(  -720.23), SIMDE_FLOAT64_C(   197.82) },
285       { SIMDE_FLOAT64_C(  -770.39), SIMDE_FLOAT64_C(  -888.99) },
286       { SIMDE_FLOAT64_C( -1490.62), SIMDE_FLOAT64_C(  -691.16) } },
287     { { SIMDE_FLOAT64_C(   238.41), SIMDE_FLOAT64_C(  -248.68) },
288       { SIMDE_FLOAT64_C(  -805.44), SIMDE_FLOAT64_C(   805.25) },
289       { SIMDE_FLOAT64_C(  -567.03), SIMDE_FLOAT64_C(   556.57) } },
290     { { SIMDE_FLOAT64_C(    13.85), SIMDE_FLOAT64_C(  -859.57) },
291       { SIMDE_FLOAT64_C(   840.09), SIMDE_FLOAT64_C(  -230.82) },
292       { SIMDE_FLOAT64_C(   853.93), SIMDE_FLOAT64_C( -1090.39) } }
293   };
294 
295   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
296     simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
297     simde__m128d b = simde_mm_loadu_pd(test_vec[i].b);
298     simde__m128d r = simde_mm_add_pd(a, b);
299     simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1);
300   }
301 
302   return 0;
303 }
304 
305 static int
test_simde_mm_add_sd(SIMDE_MUNIT_TEST_ARGS)306 test_simde_mm_add_sd(SIMDE_MUNIT_TEST_ARGS) {
307   const struct {
308     simde__m128d a;
309     simde__m128d b;
310     simde__m128d r;
311   } test_vec[8] = {
312     { simde_mm_set_pd(SIMDE_FLOAT64_C( -348.09), SIMDE_FLOAT64_C( -603.87)),
313       simde_mm_set_pd(SIMDE_FLOAT64_C(   42.81), SIMDE_FLOAT64_C( -955.64)),
314       simde_mm_set_pd(SIMDE_FLOAT64_C( -348.09), SIMDE_FLOAT64_C(-1559.51)) },
315     { simde_mm_set_pd(SIMDE_FLOAT64_C(  600.30), SIMDE_FLOAT64_C(  362.82)),
316       simde_mm_set_pd(SIMDE_FLOAT64_C( -245.13), SIMDE_FLOAT64_C( -144.52)),
317       simde_mm_set_pd(SIMDE_FLOAT64_C(  600.30), SIMDE_FLOAT64_C(  218.30)) },
318     { simde_mm_set_pd(SIMDE_FLOAT64_C( -909.82), SIMDE_FLOAT64_C(  -28.51)),
319       simde_mm_set_pd(SIMDE_FLOAT64_C( -141.49), SIMDE_FLOAT64_C(  174.41)),
320       simde_mm_set_pd(SIMDE_FLOAT64_C( -909.82), SIMDE_FLOAT64_C(  145.90)) },
321     { simde_mm_set_pd(SIMDE_FLOAT64_C( -402.79), SIMDE_FLOAT64_C( -225.69)),
322       simde_mm_set_pd(SIMDE_FLOAT64_C( -114.28), SIMDE_FLOAT64_C(  118.74)),
323       simde_mm_set_pd(SIMDE_FLOAT64_C( -402.79), SIMDE_FLOAT64_C( -106.95)) },
324     { simde_mm_set_pd(SIMDE_FLOAT64_C(  476.58), SIMDE_FLOAT64_C(  189.13)),
325       simde_mm_set_pd(SIMDE_FLOAT64_C(  158.24), SIMDE_FLOAT64_C(  133.22)),
326       simde_mm_set_pd(SIMDE_FLOAT64_C(  476.58), SIMDE_FLOAT64_C(  322.35)) },
327     { simde_mm_set_pd(SIMDE_FLOAT64_C( -902.16), SIMDE_FLOAT64_C( -720.35)),
328       simde_mm_set_pd(SIMDE_FLOAT64_C( -496.01), SIMDE_FLOAT64_C(  563.52)),
329       simde_mm_set_pd(SIMDE_FLOAT64_C( -902.16), SIMDE_FLOAT64_C( -156.83)) },
330     { simde_mm_set_pd(SIMDE_FLOAT64_C(   32.48), SIMDE_FLOAT64_C( -172.74)),
331       simde_mm_set_pd(SIMDE_FLOAT64_C(  435.61), SIMDE_FLOAT64_C(  209.72)),
332       simde_mm_set_pd(SIMDE_FLOAT64_C(   32.48), SIMDE_FLOAT64_C(   36.98)) },
333     { simde_mm_set_pd(SIMDE_FLOAT64_C(  322.78), SIMDE_FLOAT64_C( -415.13)),
334       simde_mm_set_pd(SIMDE_FLOAT64_C(  -49.82), SIMDE_FLOAT64_C( -195.58)),
335       simde_mm_set_pd(SIMDE_FLOAT64_C(  322.78), SIMDE_FLOAT64_C( -610.71)) }
336   };
337 
338   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
339     simde__m128d r = simde_mm_add_sd(test_vec[i].a, test_vec[i].b);
340     simde_assert_m128d_close(r, test_vec[i].r, 1);
341   }
342 
343   return 0;
344 }
345 
346 static int
test_simde_mm_add_si64(SIMDE_MUNIT_TEST_ARGS)347 test_simde_mm_add_si64(SIMDE_MUNIT_TEST_ARGS) {
348   const struct {
349     simde__m64 a;
350     simde__m64 b;
351     simde__m64 r;
352   } test_vec[8] = {
353     { simde_mm_cvtsi64_m64(INT64_C(  793111073070173174)),
354       simde_mm_cvtsi64_m64(INT64_C( 2108668061446341817)),
355       simde_mm_cvtsi64_m64(INT64_C( 2901779134516514991)), },
356     { simde_mm_cvtsi64_m64(INT64_C( 8875506276833571865)),
357       simde_mm_cvtsi64_m64(INT64_C(-8625831155966083456)),
358       simde_mm_cvtsi64_m64(INT64_C(  249675120867488409)), },
359     { simde_mm_cvtsi64_m64(INT64_C( 2916092148231541839)),
360       simde_mm_cvtsi64_m64(INT64_C( 7640479428881902755)),
361       simde_mm_cvtsi64_m64(INT64_C(-7890172496596107022)), },
362     { simde_mm_cvtsi64_m64(INT64_C(-3448012693901819300)),
363       simde_mm_cvtsi64_m64(INT64_C(-9198379985559078668)),
364       simde_mm_cvtsi64_m64(INT64_C( 5800351394248653648)), },
365     { simde_mm_cvtsi64_m64(INT64_C( 3628113225825158935)),
366       simde_mm_cvtsi64_m64(INT64_C(-1333669735654572042)),
367       simde_mm_cvtsi64_m64(INT64_C( 2294443490170586893)), },
368     { simde_mm_cvtsi64_m64(INT64_C( 5048798289215441413)),
369       simde_mm_cvtsi64_m64(INT64_C( -388036903570542302)),
370       simde_mm_cvtsi64_m64(INT64_C( 4660761385644899111)), },
371     { simde_mm_cvtsi64_m64(INT64_C( 6446512717337269554)),
372       simde_mm_cvtsi64_m64(INT64_C(-7669829270527021775)),
373       simde_mm_cvtsi64_m64(INT64_C(-1223316553189752221)), },
374     { simde_mm_cvtsi64_m64(INT64_C( 6296531259101832881)),
375       simde_mm_cvtsi64_m64(INT64_C( 5834912758815977701)),
376       simde_mm_cvtsi64_m64(INT64_C(-6315300055791741034)), }
377   };
378 
379   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
380     simde__m64 r = simde_mm_add_si64(test_vec[i].a, test_vec[i].b);
381     simde_assert_m64_i64(r, ==, test_vec[i].r);
382   }
383 
384   return 0;
385 }
386 
387 static int
test_simde_mm_adds_epi8(SIMDE_MUNIT_TEST_ARGS)388 test_simde_mm_adds_epi8(SIMDE_MUNIT_TEST_ARGS) {
389   const struct {
390     simde__m128i a;
391     simde__m128i b;
392     simde__m128i r;
393   } test_vec[8] = {
394     { simde_mm_set_epi8(INT8_C( 100), INT8_C(  33), INT8_C(  67), INT8_C(  67), INT8_C( 104), INT8_C(-123), INT8_C( -86), INT8_C(  74),
395                         INT8_C( -93), INT8_C(  -4), INT8_C( -12), INT8_C(  28), INT8_C(   9), INT8_C(  39), INT8_C(  83), INT8_C( -52)),
396       simde_mm_set_epi8(INT8_C(  40), INT8_C( -64), INT8_C( -19), INT8_C( -17), INT8_C(  67), INT8_C( -93), INT8_C( -22), INT8_C(  98),
397                         INT8_C( -73), INT8_C( -83), INT8_C( 107), INT8_C(  95), INT8_C(  59), INT8_C(  84), INT8_C( -72), INT8_C(-115)),
398       simde_mm_set_epi8(INT8_C( 127), INT8_C( -31), INT8_C(  48), INT8_C(  50), INT8_C( 127), INT8_C(-128), INT8_C(-108), INT8_C( 127),
399                         INT8_C(-128), INT8_C( -87), INT8_C(  95), INT8_C( 123), INT8_C(  68), INT8_C( 123), INT8_C(  11), INT8_C(-128)) },
400     { simde_mm_set_epi8(INT8_C(  76), INT8_C( 121), INT8_C(  98), INT8_C(  52), INT8_C(  50), INT8_C( -16), INT8_C(  53), INT8_C(   3),
401                         INT8_C( -57), INT8_C( -76), INT8_C( -42), INT8_C(  70), INT8_C(-122), INT8_C(  71), INT8_C( -56), INT8_C( -15)),
402       simde_mm_set_epi8(INT8_C( 100), INT8_C( 124), INT8_C(  99), INT8_C(  11), INT8_C(  -8), INT8_C(   5), INT8_C(   6), INT8_C( -54),
403                         INT8_C(  42), INT8_C( -99), INT8_C(  23), INT8_C(-128), INT8_C(  77), INT8_C(  14), INT8_C(  94), INT8_C(  53)),
404       simde_mm_set_epi8(INT8_C( 127), INT8_C( 127), INT8_C( 127), INT8_C(  63), INT8_C(  42), INT8_C( -11), INT8_C(  59), INT8_C( -51),
405                         INT8_C( -15), INT8_C(-128), INT8_C( -19), INT8_C( -58), INT8_C( -45), INT8_C(  85), INT8_C(  38), INT8_C(  38)) },
406     { simde_mm_set_epi8(INT8_C( -13), INT8_C(  29), INT8_C(  30), INT8_C(  13), INT8_C(  51), INT8_C(  11), INT8_C( -27), INT8_C( -12),
407                         INT8_C(  97), INT8_C(  87), INT8_C(  67), INT8_C(  70), INT8_C(   2), INT8_C( -40), INT8_C(  49), INT8_C( 116)),
408       simde_mm_set_epi8(INT8_C(  42), INT8_C( -17), INT8_C( -77), INT8_C( 126), INT8_C(-125), INT8_C( -42), INT8_C(  45), INT8_C( -79),
409                         INT8_C( -23), INT8_C( 110), INT8_C( 117), INT8_C( -44), INT8_C( -92), INT8_C( -20), INT8_C(-121), INT8_C( 102)),
410       simde_mm_set_epi8(INT8_C(  29), INT8_C(  12), INT8_C( -47), INT8_C( 127), INT8_C( -74), INT8_C( -31), INT8_C(  18), INT8_C( -91),
411                         INT8_C(  74), INT8_C( 127), INT8_C( 127), INT8_C(  26), INT8_C( -90), INT8_C( -60), INT8_C( -72), INT8_C( 127)) },
412     { simde_mm_set_epi8(INT8_C(  55), INT8_C(-106), INT8_C( -49), INT8_C( -49), INT8_C( -85), INT8_C( -58), INT8_C( -56), INT8_C( -25),
413                         INT8_C(  78), INT8_C(  18), INT8_C(  71), INT8_C( -12), INT8_C(  86), INT8_C( -84), INT8_C( -77), INT8_C(-116)),
414       simde_mm_set_epi8(INT8_C(-103), INT8_C( 107), INT8_C(  33), INT8_C( -17), INT8_C( 106), INT8_C(   4), INT8_C( -98), INT8_C(-128),
415                         INT8_C(  53), INT8_C(   4), INT8_C( 120), INT8_C( -44), INT8_C( -99), INT8_C( 120), INT8_C( -27), INT8_C(  45)),
416       simde_mm_set_epi8(INT8_C( -48), INT8_C(   1), INT8_C( -16), INT8_C( -66), INT8_C(  21), INT8_C( -54), INT8_C(-128), INT8_C(-128),
417                         INT8_C( 127), INT8_C(  22), INT8_C( 127), INT8_C( -56), INT8_C( -13), INT8_C(  36), INT8_C(-104), INT8_C( -71)) },
418     { simde_mm_set_epi8(INT8_C(  47), INT8_C(  15), INT8_C( 126), INT8_C(-115), INT8_C( -77), INT8_C( -27), INT8_C( -38), INT8_C(  32),
419                         INT8_C( -21), INT8_C( -80), INT8_C( 112), INT8_C(  75), INT8_C( -15), INT8_C( -92), INT8_C(  43), INT8_C( -22)),
420       simde_mm_set_epi8(INT8_C( -33), INT8_C( 127), INT8_C( 123), INT8_C(  65), INT8_C(  63), INT8_C(  85), INT8_C(  75), INT8_C(  99),
421                         INT8_C(  -2), INT8_C(  13), INT8_C( -46), INT8_C(  -8), INT8_C( 127), INT8_C(-115), INT8_C(-109), INT8_C(  14)),
422       simde_mm_set_epi8(INT8_C(  14), INT8_C( 127), INT8_C( 127), INT8_C( -50), INT8_C( -14), INT8_C(  58), INT8_C(  37), INT8_C( 127),
423                         INT8_C( -23), INT8_C( -67), INT8_C(  66), INT8_C(  67), INT8_C( 112), INT8_C(-128), INT8_C( -66), INT8_C(  -8)) },
424     { simde_mm_set_epi8(INT8_C(  18), INT8_C(  75), INT8_C(  10), INT8_C(  29), INT8_C(  27), INT8_C( 101), INT8_C(  -1), INT8_C(  78),
425                         INT8_C( -78), INT8_C( 110), INT8_C(  18), INT8_C(  82), INT8_C( -41), INT8_C(  85), INT8_C(-113), INT8_C( 126)),
426       simde_mm_set_epi8(INT8_C( -90), INT8_C(  80), INT8_C(-103), INT8_C(-111), INT8_C(  86), INT8_C(  65), INT8_C(  89), INT8_C(  88),
427                         INT8_C( -83), INT8_C(-121), INT8_C(  -2), INT8_C(  40), INT8_C( -96), INT8_C( -36), INT8_C(  64), INT8_C( -15)),
428       simde_mm_set_epi8(INT8_C( -72), INT8_C( 127), INT8_C( -93), INT8_C( -82), INT8_C( 113), INT8_C( 127), INT8_C(  88), INT8_C( 127),
429                         INT8_C(-128), INT8_C( -11), INT8_C(  16), INT8_C( 122), INT8_C(-128), INT8_C(  49), INT8_C( -49), INT8_C( 111)) },
430     { simde_mm_set_epi8(INT8_C( -90), INT8_C(  48), INT8_C( -43), INT8_C(  22), INT8_C(  78), INT8_C( -17), INT8_C( -78), INT8_C( -64),
431                         INT8_C( -97), INT8_C( -80), INT8_C( -51), INT8_C(  72), INT8_C( 114), INT8_C( -11), INT8_C( -89), INT8_C( -93)),
432       simde_mm_set_epi8(INT8_C(   8), INT8_C(  57), INT8_C(  66), INT8_C(-119), INT8_C(  79), INT8_C( -29), INT8_C( -49), INT8_C(  26),
433                         INT8_C( -12), INT8_C( -99), INT8_C(-101), INT8_C( 121), INT8_C(-112), INT8_C(  -5), INT8_C( -19), INT8_C( -27)),
434       simde_mm_set_epi8(INT8_C( -82), INT8_C( 105), INT8_C(  23), INT8_C( -97), INT8_C( 127), INT8_C( -46), INT8_C(-127), INT8_C( -38),
435                         INT8_C(-109), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C(   2), INT8_C( -16), INT8_C(-108), INT8_C(-120)) },
436     { simde_mm_set_epi8(INT8_C(  26), INT8_C( -15), INT8_C(  12), INT8_C( -66), INT8_C(  -7), INT8_C(-115), INT8_C( -21), INT8_C(  27),
437                         INT8_C( 111), INT8_C(-126), INT8_C( -43), INT8_C( -94), INT8_C( -97), INT8_C( -34), INT8_C( -47), INT8_C( -79)),
438       simde_mm_set_epi8(INT8_C(-124), INT8_C( -47), INT8_C(-123), INT8_C(-115), INT8_C( -15), INT8_C( -87), INT8_C(-121), INT8_C( -50),
439                         INT8_C( 103), INT8_C(  85), INT8_C(  34), INT8_C( -85), INT8_C(-124), INT8_C(  70), INT8_C(  14), INT8_C( -44)),
440       simde_mm_set_epi8(INT8_C( -98), INT8_C( -62), INT8_C(-111), INT8_C(-128), INT8_C( -22), INT8_C(-128), INT8_C(-128), INT8_C( -23),
441                         INT8_C( 127), INT8_C( -41), INT8_C(  -9), INT8_C(-128), INT8_C(-128), INT8_C(  36), INT8_C( -33), INT8_C(-123)) }
442   };
443 
444   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
445     simde__m128i r = simde_mm_adds_epi8(test_vec[i].a, test_vec[i].b);
446     simde_assert_m128i_i8(r, ==, test_vec[i].r);
447   }
448 
449   return 0;
450 }
451 
452 static int
test_simde_mm_adds_epi16(SIMDE_MUNIT_TEST_ARGS)453 test_simde_mm_adds_epi16(SIMDE_MUNIT_TEST_ARGS) {
454   const struct {
455     simde__m128i a;
456     simde__m128i b;
457     simde__m128i r;
458   } test_vec[8] = {
459     { simde_mm_set_epi16(INT16_C( -9187), INT16_C( 11150), INT16_C(-25711), INT16_C( 30917),
460                          INT16_C(  5637), INT16_C( 27391), INT16_C( 20667), INT16_C(-25552)),
461       simde_mm_set_epi16(INT16_C( 15244), INT16_C(-26000), INT16_C(-24422), INT16_C(-28473),
462                          INT16_C( -7393), INT16_C( 32134), INT16_C(  -161), INT16_C( -2948)),
463       simde_mm_set_epi16(INT16_C(  6057), INT16_C(-14850), INT16_C(-32768), INT16_C(  2444),
464                          INT16_C( -1756), INT16_C( 32767), INT16_C( 20506), INT16_C(-28500)) },
465     { simde_mm_set_epi16(INT16_C(-27976), INT16_C(  8581), INT16_C( 17714), INT16_C(-15964),
466                          INT16_C(-24791), INT16_C( 29014), INT16_C( -8950), INT16_C(-19859)),
467       simde_mm_set_epi16(INT16_C(-20491), INT16_C(-23795), INT16_C(  5770), INT16_C(-28365),
468                          INT16_C( -4266), INT16_C(-14588), INT16_C( 21498), INT16_C( 13063)),
469       simde_mm_set_epi16(INT16_C(-32768), INT16_C(-15214), INT16_C( 23484), INT16_C(-32768),
470                          INT16_C(-29057), INT16_C( 14426), INT16_C( 12548), INT16_C( -6796)) },
471     { simde_mm_set_epi16(INT16_C(-24285), INT16_C(-16974), INT16_C( 21513), INT16_C( 30869),
472                          INT16_C(-30698), INT16_C(  2555), INT16_C(-20742), INT16_C(-26329)),
473       simde_mm_set_epi16(INT16_C( 19660), INT16_C(-27596), INT16_C( 16650), INT16_C( 30694),
474                          INT16_C( 14408), INT16_C(  7632), INT16_C( 15232), INT16_C( -7024)),
475       simde_mm_set_epi16(INT16_C( -4625), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767),
476                          INT16_C(-16290), INT16_C( 10187), INT16_C( -5510), INT16_C(-32768)) },
477     { simde_mm_set_epi16(INT16_C( -6143), INT16_C(  5190), INT16_C(  -240), INT16_C(-14301),
478                          INT16_C( 12856), INT16_C( 32740), INT16_C(-13308), INT16_C( 31639)),
479       simde_mm_set_epi16(INT16_C( 21047), INT16_C(-20544), INT16_C(-28076), INT16_C(-30442),
480                          INT16_C( 28180), INT16_C(-18015), INT16_C( 12870), INT16_C( 12342)),
481       simde_mm_set_epi16(INT16_C( 14904), INT16_C(-15354), INT16_C(-28316), INT16_C(-32768),
482                          INT16_C( 32767), INT16_C( 14725), INT16_C(  -438), INT16_C( 32767)) },
483     { simde_mm_set_epi16(INT16_C( 21004), INT16_C( 26590), INT16_C(  -387), INT16_C(  5458),
484                          INT16_C( 28558), INT16_C( -1691), INT16_C( 13843), INT16_C( -2265)),
485       simde_mm_set_epi16(INT16_C( 24548), INT16_C(-19288), INT16_C(  1056), INT16_C(  5037),
486                          INT16_C(  9790), INT16_C( 12391), INT16_C( -2983), INT16_C(  8158)),
487       simde_mm_set_epi16(INT16_C( 32767), INT16_C(  7302), INT16_C(   669), INT16_C( 10495),
488                          INT16_C( 32767), INT16_C( 10700), INT16_C( 10860), INT16_C(  5893)) },
489     { simde_mm_set_epi16(INT16_C( 23035), INT16_C( 14493), INT16_C( 11060), INT16_C(-15265),
490                          INT16_C(-25751), INT16_C(-17380), INT16_C(-20209), INT16_C(-22539)),
491       simde_mm_set_epi16(INT16_C(-10338), INT16_C( 26220), INT16_C( -6324), INT16_C( 16083),
492                          INT16_C(-20758), INT16_C( 28594), INT16_C(-27719), INT16_C(-21423)),
493       simde_mm_set_epi16(INT16_C( 12697), INT16_C( 32767), INT16_C(  4736), INT16_C(   818),
494                          INT16_C(-32768), INT16_C( 11214), INT16_C(-32768), INT16_C(-32768)) },
495     { simde_mm_set_epi16(INT16_C(  1437), INT16_C( -1148), INT16_C( -7704), INT16_C( -3845),
496                          INT16_C(  5523), INT16_C( 32157), INT16_C( -3057), INT16_C( -2194)),
497       simde_mm_set_epi16(INT16_C( 20255), INT16_C( 16313), INT16_C( 26265), INT16_C( -5377),
498                          INT16_C( 31904), INT16_C(  3795), INT16_C( 20716), INT16_C(-30035)),
499       simde_mm_set_epi16(INT16_C( 21692), INT16_C( 15165), INT16_C( 18561), INT16_C( -9222),
500                          INT16_C( 32767), INT16_C( 32767), INT16_C( 17659), INT16_C(-32229)) },
501     { simde_mm_set_epi16(INT16_C(   856), INT16_C( 13772), INT16_C(-17603), INT16_C(-26424),
502                          INT16_C(  9957), INT16_C(-11801), INT16_C(  3067), INT16_C(-26950)),
503       simde_mm_set_epi16(INT16_C(-26495), INT16_C(-22337), INT16_C(-30714), INT16_C( 24988),
504                          INT16_C(-24287), INT16_C( 11170), INT16_C(-20015), INT16_C( 26834)),
505       simde_mm_set_epi16(INT16_C(-25639), INT16_C( -8565), INT16_C(-32768), INT16_C( -1436),
506                          INT16_C(-14330), INT16_C(  -631), INT16_C(-16948), INT16_C(  -116)) }
507   };
508 
509   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
510     simde__m128i r = simde_mm_adds_epi16(test_vec[i].a, test_vec[i].b);
511     simde_assert_m128i_i16(r, ==, test_vec[i].r);
512   }
513 
514   return 0;
515 }
516 
517 static int
test_simde_mm_adds_epu8(SIMDE_MUNIT_TEST_ARGS)518 test_simde_mm_adds_epu8(SIMDE_MUNIT_TEST_ARGS) {
519   const struct {
520     simde__m128i a;
521     simde__m128i b;
522     simde__m128i r;
523   } test_vec[8] = {
524     { simde_x_mm_set_epu8(243, 185, 175,  84, 114, 173, 219, 130,
525                            80,  12,   6, 121,  58, 223,  94, 203),
526       simde_x_mm_set_epu8( 46, 142,  32,  64, 239,  92, 213, 158,
527                            92,  20,  62, 216,   2, 162,   3, 226),
528       simde_x_mm_set_epu8(255, 255, 207, 148, 255, 255, 255, 255,
529                           172,  32,  68, 255,  60, 255,  97, 255) },
530     { simde_x_mm_set_epu8(200, 115,  63, 101, 233, 139, 164, 230,
531                             4, 147,   7, 233, 110, 206, 178, 233),
532       simde_x_mm_set_epu8( 87,  74,  19, 102, 136, 119, 164, 198,
533                           113, 170, 154,   7, 191, 195, 220, 182),
534       simde_x_mm_set_epu8(255, 189,  82, 203, 255, 255, 255, 255,
535                           117, 255, 161, 240, 255, 255, 255, 255) },
536     { simde_x_mm_set_epu8( 35,  38, 142, 165, 104,  97, 151,   1,
537                            79,  16, 160, 140,  19, 109, 210, 120),
538       simde_x_mm_set_epu8(  2, 202, 138, 112, 199, 233, 201,  65,
539                           233,  49, 101, 216,  62,  35, 235, 214),
540       simde_x_mm_set_epu8( 37, 240, 255, 255, 255, 255, 255,  66,
541                           255,  65, 255, 255,  81, 144, 255, 255) },
542     { simde_x_mm_set_epu8( 98,  74, 253, 101, 187,  74, 205,  52,
543                           154, 226, 198, 148, 241, 174, 125,  62),
544       simde_x_mm_set_epu8(163, 110,   1, 166, 233, 185, 220, 101,
545                           190,  92, 121, 253, 238,  73,  61,  34),
546       simde_x_mm_set_epu8(255, 184, 254, 255, 255, 255, 255, 153,
547                           255, 255, 255, 255, 255, 247, 186,  96) },
548     { simde_x_mm_set_epu8( 91,  28,  52,  18, 175,  61,  49,  67,
549                            76,  39, 238, 247, 137,  91, 133,   4),
550       simde_x_mm_set_epu8(142, 255, 123,  14,  70,  48,  62, 186,
551                           134,  31, 154,  34,   3,  30,  40, 184),
552       simde_x_mm_set_epu8(233, 255, 175,  32, 245, 109, 111, 253,
553                           210,  70, 255, 255, 140, 121, 173, 188) },
554     { simde_x_mm_set_epu8( 32, 230,  94,  17, 123, 186,  43,  67,
555                            13,  45, 219, 214, 133,  19,  25, 150),
556       simde_x_mm_set_epu8(114,  27, 244, 244,  84,   0, 108, 198,
557                           239, 228, 225, 158,   4,  27,  84, 116),
558       simde_x_mm_set_epu8(146, 255, 255, 255, 207, 186, 151, 255,
559                           252, 255, 255, 255, 137,  46, 109, 255) },
560     { simde_x_mm_set_epu8( 66, 152,   8,  32,   7, 222,  46,  10,
561                           116, 185,  69, 186, 194, 134,  55, 214),
562       simde_x_mm_set_epu8(185,  11, 114, 201, 179, 122,  77, 244,
563                           221, 175, 219,  12, 207, 104,  91, 252),
564       simde_x_mm_set_epu8(251, 163, 122, 233, 186, 255, 123, 254,
565                           255, 255, 255, 198, 255, 238, 146, 255) },
566     { simde_x_mm_set_epu8(149,  71,  22, 119,  62,  37, 103,  26,
567                           193,  60, 234, 165,  97, 233, 187,  76),
568       simde_x_mm_set_epu8(169,   9, 188,  18, 251, 187,  96, 167,
569                           158, 238, 176, 160,  74,  18, 253, 103),
570       simde_x_mm_set_epu8(255,  80, 210, 137, 255, 224, 199, 193,
571                           255, 255, 255, 255, 171, 251, 255, 179) }
572   };
573 
574   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
575     simde__m128i r = simde_mm_adds_epu8(test_vec[i].a, test_vec[i].b);
576     simde_assert_m128i_u8(r, ==, test_vec[i].r);
577   }
578 
579   return 0;
580 }
581 
582 static int
test_simde_mm_adds_epu16(SIMDE_MUNIT_TEST_ARGS)583 test_simde_mm_adds_epu16(SIMDE_MUNIT_TEST_ARGS) {
584   const struct {
585     simde__m128i a;
586     simde__m128i b;
587     simde__m128i r;
588   } test_vec[8] = {
589     { simde_x_mm_set_epu16(52397,  2628, 46614, 35162, 54536, 36456, 24004, 11160),
590       simde_x_mm_set_epu16(41921, 12035, 29903, 58497,  1695, 15558, 14248, 61659),
591       simde_x_mm_set_epu16(65535, 14663, 65535, 65535, 56231, 52014, 38252, 65535) },
592     { simde_x_mm_set_epu16(57345,  6650,  8556, 25986, 61163, 19076, 40550, 40920),
593       simde_x_mm_set_epu16(62607, 15369, 35325, 28241, 54252,  5722, 23748, 36984),
594       simde_x_mm_set_epu16(65535, 22019, 43881, 54227, 65535, 24798, 64298, 65535) },
595     { simde_x_mm_set_epu16(19370, 64323,  5781, 65431, 30915, 24348, 65190, 30074),
596       simde_x_mm_set_epu16(34245, 57703, 60540, 40683, 24154, 18750, 32124, 33828),
597       simde_x_mm_set_epu16(53615, 65535, 65535, 65535, 55069, 43098, 65535, 63902) },
598     { simde_x_mm_set_epu16( 1083, 62410, 53296,    45, 57969, 54778, 42038, 36216),
599       simde_x_mm_set_epu16(47446, 36131, 44258, 13796, 53696, 55457, 27279, 19924),
600       simde_x_mm_set_epu16(48529, 65535, 65535, 13841, 65535, 65535, 65535, 56140) },
601     { simde_x_mm_set_epu16(53022, 40173, 23284, 53830, 27939, 30100, 61471,   602),
602       simde_x_mm_set_epu16(42952, 36449, 22644,  6670,   537,  5689,    73,  2247),
603       simde_x_mm_set_epu16(65535, 65535, 45928, 60500, 28476, 35789, 61544,  2849) },
604     { simde_x_mm_set_epu16( 8441, 24815, 22801, 35056, 30653,  5655, 39135, 32848),
605       simde_x_mm_set_epu16( 7115, 32196, 31449, 51212, 54481,  9348, 63499, 54202),
606       simde_x_mm_set_epu16(15556, 57011, 54250, 65535, 65535, 15003, 65535, 65535) },
607     { simde_x_mm_set_epu16( 5059, 20924,  5143, 29698, 39512, 42596, 50907, 48157),
608       simde_x_mm_set_epu16(55259, 30633, 10948, 60956, 47288, 59136, 49334, 11432),
609       simde_x_mm_set_epu16(60318, 51557, 16091, 65535, 65535, 65535, 65535, 59589) },
610     { simde_x_mm_set_epu16(53397,  1584, 56368, 64962, 35166, 11367, 24855, 22370),
611       simde_x_mm_set_epu16( 5862,  9719, 15493, 14762, 25151, 48370, 30737, 29969),
612       simde_x_mm_set_epu16(59259, 11303, 65535, 65535, 60317, 59737, 55592, 52339) }
613   };
614 
615   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
616     simde__m128i r = simde_mm_adds_epu16(test_vec[i].a, test_vec[i].b);
617     simde_assert_m128i_u16(r, ==, test_vec[i].r);
618   }
619 
620   return 0;
621 }
622 
623 static int
test_simde_mm_and_pd(SIMDE_MUNIT_TEST_ARGS)624 test_simde_mm_and_pd(SIMDE_MUNIT_TEST_ARGS) {
625   simde__m128d
626     all_set = simde_x_mm_setone_pd(),
627     all_unset = simde_mm_setzero_pd();
628 
629   simde_assert_m128d_equal(simde_mm_and_pd(all_set, all_unset), all_unset);
630   simde_assert_m128d_equal(simde_mm_and_pd(all_set, all_set), all_set);
631   simde_assert_m128d_equal(simde_mm_and_pd(all_unset, all_unset), all_unset);
632 
633   return 0;
634 }
635 
636 static int
test_simde_mm_and_si128(SIMDE_MUNIT_TEST_ARGS)637 test_simde_mm_and_si128(SIMDE_MUNIT_TEST_ARGS) {
638   const struct {
639     simde__m128i a;
640     simde__m128i b;
641     simde__m128i r;
642   } test_vec[8] = {
643     { simde_mm_set_epi32(INT32_C( 1143386005), INT32_C(  255040004), INT32_C(  778825143), INT32_C( 1160880262)),
644       simde_mm_set_epi32(INT32_C(-1598010564), INT32_C(  882577136), INT32_C( 1895747884), INT32_C(   78458499)),
645       simde_mm_set_epi32(INT32_C(       2836), INT32_C(   68356608), INT32_C(  543867172), INT32_C(   69279874)) },
646     { simde_mm_set_epi32(INT32_C(  929630839), INT32_C( 1332223012), INT32_C( -595247247), INT32_C( 1607004091)),
647       simde_mm_set_epi32(INT32_C(  -96984995), INT32_C( -496201158), INT32_C( 1667897198), INT32_C(  329068048)),
648       simde_mm_set_epi32(INT32_C(  841482325), INT32_C( 1114116128), INT32_C( 1073747808), INT32_C(  327691792)) },
649     { simde_mm_set_epi32(INT32_C( 1507410371), INT32_C(-1202228125), INT32_C(  213174798), INT32_C( 1712466479)),
650       simde_mm_set_epi32(INT32_C( -416935364), INT32_C(   76821686), INT32_C( -895281725), INT32_C(  640856929)),
651       simde_mm_set_epi32(INT32_C( 1090519040), INT32_C(    1324066), INT32_C(  144703490), INT32_C(  638722593)) },
652     { simde_mm_set_epi32(INT32_C(-1967400648), INT32_C( -398277023), INT32_C( 1276094966), INT32_C(-1580835262)),
653       simde_mm_set_epi32(INT32_C(  185492863), INT32_C( 1265367516), INT32_C( -384438464), INT32_C( 1008626379)),
654       simde_mm_set_epi32(INT32_C(  168444216), INT32_C( 1212334144), INT32_C( 1208330560), INT32_C(  537283138)) },
655     { simde_mm_set_epi32(INT32_C( 1287640091), INT32_C( -654000828), INT32_C(  597524546), INT32_C(  182360913)),
656       simde_mm_set_epi32(INT32_C( 1236330411), INT32_C( 1010510657), INT32_C(-1874705697), INT32_C( -544222805)),
657       simde_mm_set_epi32(INT32_C( 1219544075), INT32_C(  402663744), INT32_C(         66), INT32_C(  177115393)) },
658     { simde_mm_set_epi32(INT32_C( -783740762), INT32_C( 1592969400), INT32_C(-1896275639), INT32_C( 1398555518)),
659       simde_mm_set_epi32(INT32_C(  618146080), INT32_C( -972493969), INT32_C( -440292799), INT32_C(  888342397)),
660       simde_mm_set_epi32(INT32_C(    4718624), INT32_C( 1174456360), INT32_C(-2067718079), INT32_C(  273679228)) },
661     { simde_mm_set_epi32(INT32_C(  975551520), INT32_C(  223749592), INT32_C(-1022254731), INT32_C( -845311996)),
662       simde_mm_set_epi32(INT32_C( 1522650069), INT32_C( 1767255815), INT32_C( 1217271913), INT32_C(-1365644996)),
663       simde_mm_set_epi32(INT32_C(  436307968), INT32_C(  156640512), INT32_C( 1073741921), INT32_C(-1936097276)) },
664     { simde_mm_set_epi32(INT32_C(-1607852092), INT32_C( -146112938), INT32_C(  112326370), INT32_C(  971940993)),
665       simde_mm_set_epi32(INT32_C( 1129446249), INT32_C( -367605030), INT32_C( 2031327443), INT32_C( -763011289)),
666       simde_mm_set_epi32(INT32_C(       6976), INT32_C( -503166382), INT32_C(    1151170), INT32_C(  277087233)) }
667   };
668 
669   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
670     simde__m128i r = simde_mm_and_si128(test_vec[i].a, test_vec[i].b);
671     simde_assert_m128i_i8(r, ==, test_vec[i].r);
672   }
673 
674   return 0;
675 }
676 
677 static int
test_simde_mm_andnot_pd(SIMDE_MUNIT_TEST_ARGS)678 test_simde_mm_andnot_pd (SIMDE_MUNIT_TEST_ARGS) {
679   static const struct {
680     const int64_t a[2];
681     const int64_t b[2];
682     const int64_t r[2];
683   } test_vec[] = {
684     { { -INT64_C( 2301068032488183266),  INT64_C( 2211418302004999322) },
685       { -INT64_C( 6496218963460796338), -INT64_C( 1823812953617724359) },
686       {  INT64_C(  416592882749870144), -INT64_C( 2305839985119459295) } },
687     { { -INT64_C( 4026537826130773906), -INT64_C( 4293586900818793454) },
688       {  INT64_C( 3627658264586431853),  INT64_C( 2793073689318142995) },
689       {  INT64_C( 3620897194948822273),  INT64_C( 2486233871494942721) } },
690     { {  INT64_C( 9163842748139474741),  INT64_C( 2569644122047224175) },
691       { -INT64_C(  969348282954885022),  INT64_C(  811088657167341923) },
692       { -INT64_C( 9187294233813168062),  INT64_C(  594616163615653888) } },
693     { { -INT64_C( 7634837049602759393),  INT64_C( 7858258033422095925) },
694       {  INT64_C( 8560872385946379772),  INT64_C( 5408772763975523373) },
695       {  INT64_C( 6972768561940938976),  INT64_C(  144627715121612808) } },
696     { {  INT64_C( 5707717806252392055), -INT64_C( 3300212919446621766) },
697       {  INT64_C( 9059126230790306606), -INT64_C( 7813869476910184169) },
698       {  INT64_C( 3497071796361199880),  INT64_C(  111611459737241605) } },
699     { {  INT64_C(  322122041068250894),  INT64_C( 3008683809568371225) },
700       {  INT64_C(  127266174305791736), -INT64_C( 2153613612017236628) },
701       {  INT64_C(  109216282184321264), -INT64_C( 4459687314800229020) } },
702     { { -INT64_C( 3492115216109711814),  INT64_C( 5213618401531810613) },
703       {  INT64_C( 6185505972225623532),  INT64_C( 2977154160444382105) },
704       {  INT64_C( 1177221022203544004),  INT64_C( 2378043193183766664) } },
705     { {  INT64_C( 1499198256367688520), -INT64_C(  638210378185732981) },
706       { -INT64_C( 6727634533471112091), -INT64_C( 4478722654180704065) },
707       { -INT64_C( 6764261500459572187),  INT64_C(   60904633988546612) } }
708   };
709 
710   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
711     simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a);
712     simde__m128i b = simde_x_mm_loadu_epi64(test_vec[i].b);
713     simde__m128i r = simde_mm_castpd_si128(simde_mm_andnot_pd(simde_mm_castsi128_pd(a), simde_mm_castsi128_pd(b)));
714     simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r));
715   }
716 
717   return 0;
718 }
719 
720 static int
test_simde_mm_andnot_si128(SIMDE_MUNIT_TEST_ARGS)721 test_simde_mm_andnot_si128(SIMDE_MUNIT_TEST_ARGS) {
722   const struct {
723     simde__m128i a;
724     simde__m128i b;
725     simde__m128i r;
726   } test_vec[8] = {
727     { simde_mm_set_epi32(INT32_C( -560808079), INT32_C( -229809400), INT32_C(-1262424809), INT32_C(   39478984)),
728       simde_mm_set_epi32(INT32_C(-2116907800), INT32_C( -478633369), INT32_C( -758500702), INT32_C(  774827765)),
729       simde_mm_set_epi32(INT32_C(   20971656), INT32_C(   19955815), INT32_C( 1107956384), INT32_C(  740591669)) },
730     { simde_mm_set_epi32(INT32_C( -390101732), INT32_C( -878593643), INT32_C(  -87858932), INT32_C( 1872700566)),
731       simde_mm_set_epi32(INT32_C(  794830631), INT32_C( 1201718915), INT32_C( 1477008088), INT32_C( -178127418)),
732       simde_mm_set_epi32(INT32_C(  121643555), INT32_C(   67126786), INT32_C(     531152), INT32_C(-1872701120)) },
733     { simde_mm_set_epi32(INT32_C( -969785513), INT32_C(  743154241), INT32_C( -944974936), INT32_C(-1136592248)),
734       simde_mm_set_epi32(INT32_C( -909998602), INT32_C(  431643866), INT32_C( -708589890), INT32_C( -556429363)),
735       simde_mm_set_epi32(INT32_C(  163610784), INT32_C(  296770714), INT32_C(  272827414), INT32_C( 1117062469)) },
736     { simde_mm_set_epi32(INT32_C( 1619650408), INT32_C(  861525694), INT32_C(-2058207417), INT32_C(  228720218)),
737       simde_mm_set_epi32(INT32_C( 1416821078), INT32_C( 2107001565), INT32_C(-1248448269), INT32_C(-1204471361)),
738       simde_mm_set_epi32(INT32_C(  343021590), INT32_C( 1283852353), INT32_C(  813957296), INT32_C(-1340866139)) },
739     { simde_mm_set_epi32(INT32_C( -343490394), INT32_C( 1846187115), INT32_C( -847771260), INT32_C(   97935165)),
740       simde_mm_set_epi32(INT32_C(  -69489865), INT32_C(-1109591795), INT32_C(  169478308), INT32_C( 1662522631)),
741       simde_mm_set_epi32(INT32_C(  274279185), INT32_C(-1848350460), INT32_C(   33685536), INT32_C( 1644691458)) },
742     { simde_mm_set_epi32(INT32_C(  608096731), INT32_C( -775399847), INT32_C(  -52780990), INT32_C(  459462722)),
743       simde_mm_set_epi32(INT32_C(-1928888486), INT32_C(-1926941714), INT32_C(-1218438233), INT32_C(  195273416)),
744       simde_mm_set_epi32(INT32_C(-1996411392), INT32_C(  203760038), INT32_C(   52435877), INT32_C(    8462984)) },
745     { simde_mm_set_epi32(INT32_C( 1829801526), INT32_C( 1678890728), INT32_C(-1629742565), INT32_C(  902941266)),
746       simde_mm_set_epi32(INT32_C(  110066513), INT32_C( -591553870), INT32_C( -950259417), INT32_C(  810403185)),
747       simde_mm_set_epi32(INT32_C(   42955073), INT32_C(-1733556206), INT32_C( 1090527524), INT32_C(     787745)) },
748     { simde_mm_set_epi32(INT32_C(  321441431), INT32_C(-1200267660), INT32_C( -313751420), INT32_C(  515761953)),
749       simde_mm_set_epi32(INT32_C( -687838781), INT32_C( 1420638186), INT32_C(-1442242179), INT32_C( 1996838037)),
750       simde_mm_set_epi32(INT32_C(-1006624448), INT32_C( 1149772170), INT32_C(   33628537), INT32_C( 1627394196)) }
751   };
752 
753   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
754     simde__m128i r = simde_mm_andnot_si128(test_vec[i].a, test_vec[i].b);
755     simde_assert_m128i_i8(r, ==, test_vec[i].r);
756   }
757 
758   return 0;
759 }
760 
761 static int
test_simde_mm_avg_epu8(SIMDE_MUNIT_TEST_ARGS)762 test_simde_mm_avg_epu8(SIMDE_MUNIT_TEST_ARGS) {
763   const struct {
764     simde__m128i a;
765     simde__m128i b;
766     simde__m128i r;
767   } test_vec[8] = {
768     { simde_x_mm_set_epu8( 22, 218, 216,  66,  82, 156,  47,  52,
769                           255,   1, 136, 174, 147, 136, 106, 178),
770       simde_x_mm_set_epu8( 66, 241, 223, 129,  96,  67,   0,  68,
771                           242,  71, 233, 224, 227, 252, 191,  92),
772       simde_x_mm_set_epu8( 44, 230, 220,  98,  89, 112,  24,  60,
773                           249,  36, 185, 199, 187, 194, 149, 135) },
774     { simde_x_mm_set_epu8(219, 214,  26,  72,  63,  56, 200, 118,
775                           196, 107,  88, 110, 187,   3,  64, 214),
776       simde_x_mm_set_epu8( 44, 175, 103,  82,  87, 192, 180,  37,
777                             0, 200,  53, 214,  25,  17,  19, 149),
778       simde_x_mm_set_epu8(132, 195,  65,  77,  75, 124, 190,  78,
779                            98, 154,  71, 162, 106,  10,  42, 182) },
780     { simde_x_mm_set_epu8(221,   9, 162, 208,  84,  84,  50, 140,
781                           230,  69, 178,  12,  34, 173,  44,  58),
782       simde_x_mm_set_epu8(  4, 110,  65, 218, 252, 108, 241, 136,
783                            36, 109,  68,   2, 121,  10, 120, 101),
784       simde_x_mm_set_epu8(113,  60, 114, 213, 168,  96, 146, 138,
785                           133,  89, 123,   7,  78,  92,  82,  80) },
786     { simde_x_mm_set_epu8(173,  38,  26, 251,  66, 136, 168, 132,
787                           170, 244, 145,  27,  76, 168,  97, 129),
788       simde_x_mm_set_epu8(211,  66,  29,  93, 231,  30, 149, 218,
789                            72,  12, 231, 238, 124,   3, 127,  55),
790       simde_x_mm_set_epu8(192,  52,  28, 172, 149,  83, 159, 175,
791                           121, 128, 188, 133, 100,  86, 112,  92) },
792     { simde_x_mm_set_epu8( 33, 120,  41,   4, 226,  71, 169,  72,
793                            92, 211,  80,  53,  22, 250, 136,  31),
794       simde_x_mm_set_epu8(163, 237, 214, 178,  29, 194, 137, 109,
795                           134, 197,  40, 228, 174, 101, 114, 162),
796       simde_x_mm_set_epu8( 98, 179, 128,  91, 128, 133, 153,  91,
797                           113, 204,  60, 141,  98, 176, 125,  97) },
798     { simde_x_mm_set_epu8(151, 241,  42,  96,  21, 167,  26, 188,
799                           124, 136, 158, 144, 227, 152,   4, 152),
800       simde_x_mm_set_epu8( 43, 216,  77, 147, 105, 127,  87,  93,
801                           160, 103,  68,  85,  77,  41,  67, 189),
802       simde_x_mm_set_epu8( 97, 229,  60, 122,  63, 147,  57, 141,
803                           142, 120, 113, 115, 152,  97,  36, 171) },
804     { simde_x_mm_set_epu8(229, 241,   5, 141,  89,  37, 175, 184,
805                           139, 113,  20, 221, 179, 130,  61,  16),
806       simde_x_mm_set_epu8( 74,  70, 240, 235, 217, 244,  23, 139,
807                           224,  48, 224, 137, 221, 180, 178,  80),
808       simde_x_mm_set_epu8(152, 156, 123, 188, 153, 141,  99, 162,
809                           182,  81, 122, 179, 200, 155, 120,  48) },
810     { simde_x_mm_set_epu8( 30,  40, 139,  23, 169,  60,  77, 114,
811                            84,  55,  70, 122,  10,  27,  47, 237),
812       simde_x_mm_set_epu8(133, 159, 246, 175, 239, 136, 111, 216,
813                           173,  32, 117,  64, 231, 128, 162, 145),
814       simde_x_mm_set_epu8( 82, 100, 193,  99, 204,  98,  94, 165,
815                           129,  44,  94,  93, 121,  78, 105, 191) }
816   };
817 
818   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
819     simde__m128i r = simde_mm_avg_epu8(test_vec[i].a, test_vec[i].b);
820     simde_assert_m128i_u8(r, ==, test_vec[i].r);
821   }
822 
823   return 0;
824 }
825 
826 static int
test_simde_mm_avg_epu16(SIMDE_MUNIT_TEST_ARGS)827 test_simde_mm_avg_epu16(SIMDE_MUNIT_TEST_ARGS) {
828   const struct {
829     simde__m128i a;
830     simde__m128i b;
831     simde__m128i r;
832   } test_vec[8] = {
833     { simde_x_mm_set_epu16( 5850, 55362, 21148, 12084, 65281, 34990, 37768, 27314),
834       simde_x_mm_set_epu16(17137, 57217, 24643,    68, 62023, 59872, 58364, 48988),
835       simde_x_mm_set_epu16(11494, 56290, 22896,  6076, 63652, 47431, 48066, 38151) },
836     { simde_x_mm_set_epu16(56278,  6728, 16184, 51318, 50283, 22638, 47875, 16598),
837       simde_x_mm_set_epu16(11439, 26450, 22464, 46117,   200, 13782,  6417,  5013),
838       simde_x_mm_set_epu16(33859, 16589, 19324, 48718, 25242, 18210, 27146, 10806) },
839     { simde_x_mm_set_epu16(56585, 41680, 21588, 12940, 58949, 45580,  8877, 11322),
840       simde_x_mm_set_epu16( 1134, 16858, 64620, 61832,  9325, 17410, 30986, 30821),
841       simde_x_mm_set_epu16(28860, 29269, 43104, 37386, 34137, 31495, 19932, 21072) },
842     { simde_x_mm_set_epu16(44326,  6907, 17032, 43140, 43764, 37147, 19624, 24961),
843       simde_x_mm_set_epu16(54082,  7517, 59166, 38362, 18444, 59374, 31747, 32567),
844       simde_x_mm_set_epu16(49204,  7212, 38099, 40751, 31104, 48261, 25686, 28764) },
845     { simde_x_mm_set_epu16( 8568, 10500, 57927, 43336, 23763, 20533,  5882, 34847),
846       simde_x_mm_set_epu16(41965, 54962,  7618, 35181, 34501, 10468, 44645, 29346),
847       simde_x_mm_set_epu16(25267, 32731, 32773, 39259, 29132, 15501, 25264, 32097) },
848     { simde_x_mm_set_epu16(38897, 10848,  5543,  6844, 31880, 40592, 58264,  1176),
849       simde_x_mm_set_epu16(11224, 19859, 27007, 22365, 41063, 17493, 19753, 17341),
850       simde_x_mm_set_epu16(25061, 15354, 16275, 14605, 36472, 29043, 39009,  9259) },
851     { simde_x_mm_set_epu16(58865,  1421, 22821, 44984, 35697,  5341, 45954, 15632),
852       simde_x_mm_set_epu16(19014, 61675, 55796,  6027, 57392, 57481, 56756, 45648),
853       simde_x_mm_set_epu16(38940, 31548, 39309, 25506, 46545, 31411, 51355, 30640) },
854     { simde_x_mm_set_epu16( 7720, 35607, 43324, 19826, 21559, 18042,  2587, 12269),
855       simde_x_mm_set_epu16(34207, 63151, 61320, 28632, 44320, 30016, 59264, 41617),
856       simde_x_mm_set_epu16(20964, 49379, 52322, 24229, 32940, 24029, 30926, 26943) }
857   };
858 
859   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
860     simde__m128i r = simde_mm_avg_epu16(test_vec[i].a, test_vec[i].b);
861     simde_assert_m128i_u16(r, ==, test_vec[i].r);
862   }
863 
864   return 0;
865 }
866 
867 static int
test_simde_mm_bslli_si128(SIMDE_MUNIT_TEST_ARGS)868 test_simde_mm_bslli_si128(SIMDE_MUNIT_TEST_ARGS) {
869   simde__m128i a, e, r;
870 
871   a = simde_mm_set_epi8(INT8_C(  24), INT8_C( -55), INT8_C( -96), INT8_C(  87),
872                         INT8_C( -58), INT8_C(-112), INT8_C(  23), INT8_C(-126),
873                         INT8_C(  -8), INT8_C( -11), INT8_C(  18), INT8_C(  30),
874                         INT8_C( 114), INT8_C(  65), INT8_C(  26), INT8_C(-121));
875 
876   e = simde_mm_set_epi8(INT8_C(  24), INT8_C( -55), INT8_C( -96), INT8_C(  87),
877                         INT8_C( -58), INT8_C(-112), INT8_C(  23), INT8_C(-126),
878                         INT8_C(  -8), INT8_C( -11), INT8_C(  18), INT8_C(  30),
879                         INT8_C( 114), INT8_C(  65), INT8_C(  26), INT8_C(-121));
880   r = simde_mm_bslli_si128(a, 0);
881   simde_assert_m128i_i8(r, ==, e);
882 
883   e = simde_mm_set_epi8(INT8_C(  87), INT8_C( -58), INT8_C(-112), INT8_C(  23),
884                         INT8_C(-126), INT8_C(  -8), INT8_C( -11), INT8_C(  18),
885                         INT8_C(  30), INT8_C( 114), INT8_C(  65), INT8_C(  26),
886                         INT8_C(-121), INT8_C(   0), INT8_C(   0), INT8_C(   0));
887   r = simde_mm_bslli_si128(a, 3);
888   simde_assert_m128i_i8(r, ==, e);
889 
890   e = simde_mm_set_epi8(INT8_C(  65), INT8_C(  26), INT8_C(-121), INT8_C(   0),
891                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
892                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
893                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0));
894   r = simde_mm_bslli_si128(a, 13);
895   simde_assert_m128i_i8(r, ==, e);
896 
897   e = simde_mm_set_epi8(INT8_C( -96), INT8_C(  87), INT8_C( -58), INT8_C(-112),
898                         INT8_C(  23), INT8_C(-126), INT8_C(  -8), INT8_C( -11),
899                         INT8_C(  18), INT8_C(  30), INT8_C( 114), INT8_C(  65),
900                         INT8_C(  26), INT8_C(-121), INT8_C(   0), INT8_C(   0));
901   r = simde_mm_bslli_si128(a, 2);
902   simde_assert_m128i_i8(r, ==, e);
903 
904   e = simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
905                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
906                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
907                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0));
908   r = simde_mm_bslli_si128(a, 19);
909   simde_assert_m128i_i8(r, ==, e);
910 
911   return 0;
912 }
913 
914 static int
test_simde_mm_bsrli_si128(SIMDE_MUNIT_TEST_ARGS)915 test_simde_mm_bsrli_si128(SIMDE_MUNIT_TEST_ARGS) {
916   simde__m128i a, e, r;
917 
918   a = simde_mm_set_epi8(INT8_C(-121), INT8_C( -58), INT8_C( -15), INT8_C(-115),
919                         INT8_C( -97), INT8_C( -96), INT8_C( -74), INT8_C(-113),
920                         INT8_C(-121), INT8_C(  99), INT8_C( 126), INT8_C( 113),
921                         INT8_C( -29), INT8_C( 114), INT8_C( -65), INT8_C(   9));
922 
923   e = simde_mm_set_epi8(INT8_C(-121), INT8_C( -58), INT8_C( -15), INT8_C(-115),
924                         INT8_C( -97), INT8_C( -96), INT8_C( -74), INT8_C(-113),
925                         INT8_C(-121), INT8_C(  99), INT8_C( 126), INT8_C( 113),
926                         INT8_C( -29), INT8_C( 114), INT8_C( -65), INT8_C(   9));
927   r = simde_mm_bsrli_si128(a, 0);
928   simde_assert_m128i_i8(r, ==, e);
929 
930   e = simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(-121),
931                         INT8_C( -58), INT8_C( -15), INT8_C(-115), INT8_C( -97),
932                         INT8_C( -96), INT8_C( -74), INT8_C(-113), INT8_C(-121),
933                         INT8_C(  99), INT8_C( 126), INT8_C( 113), INT8_C( -29));
934   r = simde_mm_bsrli_si128(a, 3);
935   simde_assert_m128i_i8(r, ==, e);
936 
937   e = simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
938                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
939                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
940                         INT8_C(   0), INT8_C(-121), INT8_C( -58), INT8_C( -15));
941   r = simde_mm_bsrli_si128(a, 13);
942   simde_assert_m128i_i8(r, ==, e);
943 
944   e = simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(-121), INT8_C( -58),
945                         INT8_C( -15), INT8_C(-115), INT8_C( -97), INT8_C( -96),
946                         INT8_C( -74), INT8_C(-113), INT8_C(-121), INT8_C(  99),
947                         INT8_C( 126), INT8_C( 113), INT8_C( -29), INT8_C( 114));
948   r = simde_mm_bsrli_si128(a, 2);
949   simde_assert_m128i_i8(r, ==, e);
950 
951   e = simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
952                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
953                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
954                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0));
955   r = simde_mm_bsrli_si128(a, 19);
956   simde_assert_m128i_i8(r, ==, e);
957 
958   return 0;
959 }
960 
961 static int
test_simde_mm_castpd_ps(SIMDE_MUNIT_TEST_ARGS)962 test_simde_mm_castpd_ps(SIMDE_MUNIT_TEST_ARGS) {
963   const struct {
964     simde__m128d a;
965     simde__m128 r;
966   } test_vec[8] = {
967     { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C( 499.48), SIMDE_FLOAT32_C(  72.83), SIMDE_FLOAT32_C(-420.10), SIMDE_FLOAT32_C( -361.15))),
968       simde_mm_set_ps(SIMDE_FLOAT32_C( 499.48), SIMDE_FLOAT32_C(  72.83), SIMDE_FLOAT32_C(-420.10), SIMDE_FLOAT32_C( -361.15)) },
969     { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C(-412.40), SIMDE_FLOAT32_C(-314.35), SIMDE_FLOAT32_C(-851.03), SIMDE_FLOAT32_C(  128.01))),
970       simde_mm_set_ps(SIMDE_FLOAT32_C(-412.40), SIMDE_FLOAT32_C(-314.35), SIMDE_FLOAT32_C(-851.03), SIMDE_FLOAT32_C(  128.01)) },
971     { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C(-411.93), SIMDE_FLOAT32_C( 780.67), SIMDE_FLOAT32_C(-928.22), SIMDE_FLOAT32_C(  762.24))),
972       simde_mm_set_ps(SIMDE_FLOAT32_C(-411.93), SIMDE_FLOAT32_C( 780.67), SIMDE_FLOAT32_C(-928.22), SIMDE_FLOAT32_C(  762.24)) },
973     { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C(-614.18), SIMDE_FLOAT32_C( 644.19), SIMDE_FLOAT32_C( -41.15), SIMDE_FLOAT32_C(  871.68))),
974       simde_mm_set_ps(SIMDE_FLOAT32_C(-614.18), SIMDE_FLOAT32_C( 644.19), SIMDE_FLOAT32_C( -41.15), SIMDE_FLOAT32_C(  871.68)) },
975     { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C( 795.82), SIMDE_FLOAT32_C( 486.26), SIMDE_FLOAT32_C(-686.59), SIMDE_FLOAT32_C(  277.69))),
976       simde_mm_set_ps(SIMDE_FLOAT32_C( 795.82), SIMDE_FLOAT32_C( 486.26), SIMDE_FLOAT32_C(-686.59), SIMDE_FLOAT32_C(  277.69)) },
977     { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C( 221.74), SIMDE_FLOAT32_C(-655.22), SIMDE_FLOAT32_C(-366.90), SIMDE_FLOAT32_C( -245.25))),
978       simde_mm_set_ps(SIMDE_FLOAT32_C( 221.74), SIMDE_FLOAT32_C(-655.22), SIMDE_FLOAT32_C(-366.90), SIMDE_FLOAT32_C( -245.25)) },
979     { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C( -83.75), SIMDE_FLOAT32_C( 862.26), SIMDE_FLOAT32_C(  55.37), SIMDE_FLOAT32_C(  -26.83))),
980       simde_mm_set_ps(SIMDE_FLOAT32_C( -83.75), SIMDE_FLOAT32_C( 862.26), SIMDE_FLOAT32_C(  55.37), SIMDE_FLOAT32_C(  -26.83)) },
981     { simde_mm_castps_pd(simde_mm_set_ps(SIMDE_FLOAT32_C(-557.26), SIMDE_FLOAT32_C(-554.56), SIMDE_FLOAT32_C(-507.07), SIMDE_FLOAT32_C(  395.47))),
982       simde_mm_set_ps(SIMDE_FLOAT32_C(-557.26), SIMDE_FLOAT32_C(-554.56), SIMDE_FLOAT32_C(-507.07), SIMDE_FLOAT32_C(  395.47)) }
983   };
984 
985   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
986     simde__m128 r = simde_mm_castpd_ps(test_vec[i].a);
987     simde_assert_m128_close(r, test_vec[i].r, 1);
988   }
989 
990   return 0;
991 }
992 
993 static int
test_simde_mm_castps_pd(SIMDE_MUNIT_TEST_ARGS)994 test_simde_mm_castps_pd(SIMDE_MUNIT_TEST_ARGS) {
995   const struct {
996     simde__m128 a;
997     simde__m128 r;
998   } test_vec[8] = {
999     { simde_mm_set_ps(SIMDE_FLOAT32_C(   -48.11), SIMDE_FLOAT32_C(    66.54), SIMDE_FLOAT32_C(  -702.38), SIMDE_FLOAT32_C(  -384.97)),
1000       simde_mm_set_ps(SIMDE_FLOAT32_C(   -48.11), SIMDE_FLOAT32_C(    66.54), SIMDE_FLOAT32_C(  -702.38), SIMDE_FLOAT32_C(  -384.97)) },
1001     { simde_mm_set_ps(SIMDE_FLOAT32_C(   975.22), SIMDE_FLOAT32_C(  -352.19), SIMDE_FLOAT32_C(  -258.03), SIMDE_FLOAT32_C(   978.92)),
1002       simde_mm_set_ps(SIMDE_FLOAT32_C(   975.22), SIMDE_FLOAT32_C(  -352.19), SIMDE_FLOAT32_C(  -258.03), SIMDE_FLOAT32_C(   978.92)) },
1003     { simde_mm_set_ps(SIMDE_FLOAT32_C(    24.73), SIMDE_FLOAT32_C(  -551.11), SIMDE_FLOAT32_C(   -52.52), SIMDE_FLOAT32_C(   259.60)),
1004       simde_mm_set_ps(SIMDE_FLOAT32_C(    24.73), SIMDE_FLOAT32_C(  -551.11), SIMDE_FLOAT32_C(   -52.52), SIMDE_FLOAT32_C(   259.60)) },
1005     { simde_mm_set_ps(SIMDE_FLOAT32_C(   614.82), SIMDE_FLOAT32_C(   711.79), SIMDE_FLOAT32_C(   715.74), SIMDE_FLOAT32_C(   872.89)),
1006       simde_mm_set_ps(SIMDE_FLOAT32_C(   614.82), SIMDE_FLOAT32_C(   711.79), SIMDE_FLOAT32_C(   715.74), SIMDE_FLOAT32_C(   872.89)) },
1007     { simde_mm_set_ps(SIMDE_FLOAT32_C(   434.09), SIMDE_FLOAT32_C(    97.43), SIMDE_FLOAT32_C(   836.69), SIMDE_FLOAT32_C(   490.93)),
1008       simde_mm_set_ps(SIMDE_FLOAT32_C(   434.09), SIMDE_FLOAT32_C(    97.43), SIMDE_FLOAT32_C(   836.69), SIMDE_FLOAT32_C(   490.93)) },
1009     { simde_mm_set_ps(SIMDE_FLOAT32_C(  -964.09), SIMDE_FLOAT32_C(   616.34), SIMDE_FLOAT32_C(  -267.39), SIMDE_FLOAT32_C(  -457.57)),
1010       simde_mm_set_ps(SIMDE_FLOAT32_C(  -964.09), SIMDE_FLOAT32_C(   616.34), SIMDE_FLOAT32_C(  -267.39), SIMDE_FLOAT32_C(  -457.57)) },
1011     { simde_mm_set_ps(SIMDE_FLOAT32_C(   -14.24), SIMDE_FLOAT32_C(   802.19), SIMDE_FLOAT32_C(   741.42), SIMDE_FLOAT32_C(  -211.48)),
1012       simde_mm_set_ps(SIMDE_FLOAT32_C(   -14.24), SIMDE_FLOAT32_C(   802.19), SIMDE_FLOAT32_C(   741.42), SIMDE_FLOAT32_C(  -211.48)) },
1013     { simde_mm_set_ps(SIMDE_FLOAT32_C(  -739.71), SIMDE_FLOAT32_C(  -918.58), SIMDE_FLOAT32_C(  -598.92), SIMDE_FLOAT32_C(  -924.03)),
1014       simde_mm_set_ps(SIMDE_FLOAT32_C(  -739.71), SIMDE_FLOAT32_C(  -918.58), SIMDE_FLOAT32_C(  -598.92), SIMDE_FLOAT32_C(  -924.03)) }
1015   };
1016 
1017   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1018     simde__m128 r = simde_mm_castpd_ps(simde_mm_castps_pd(test_vec[i].a));
1019     simde_assert_m128_close(r, test_vec[i].r, 1);
1020   }
1021 
1022   return 0;
1023 }
1024 
1025 static int
test_simde_mm_castsi128_pd(SIMDE_MUNIT_TEST_ARGS)1026 test_simde_mm_castsi128_pd(SIMDE_MUNIT_TEST_ARGS) {
1027   simde__m128i test_vec[8] = {
1028     simde_mm_set_epi32(INT32_C(-1784593785), INT32_C( 1037253725), INT32_C(  225827038), INT32_C(-2070942389)),
1029     simde_mm_set_epi32(INT32_C( 2006039830), INT32_C(  831495128), INT32_C( 1875760759), INT32_C(  315081037)),
1030     simde_mm_set_epi32(INT32_C( -305750616), INT32_C(  602617399), INT32_C( 1569354160), INT32_C(-1091905770)),
1031     simde_mm_set_epi32(INT32_C(-1852218105), INT32_C(-1464694454), INT32_C(-1287612023), INT32_C( 1418106957)),
1032     simde_mm_set_epi32(INT32_C( 1382189486), INT32_C(  561466363), INT32_C( -455563445), INT32_C(  733917325)),
1033     simde_mm_set_epi32(INT32_C( -187102213), INT32_C( -373894547), INT32_C(  335417846), INT32_C(  400855569)),
1034     simde_mm_set_epi32(INT32_C( 1405293845), INT32_C( -164981292), INT32_C(  180491437), INT32_C( 1551867928)),
1035     simde_mm_set_epi32(INT32_C(  458893421), INT32_C(-1960480477), INT32_C( 1264329759), INT32_C( 1663854158))
1036   };
1037 
1038   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1039     simde__m128i r = simde_mm_castpd_si128(simde_mm_castsi128_pd(test_vec[i]));
1040     simde_assert_m128i_equal(r, test_vec[i]);
1041   }
1042 
1043   return 0;
1044 }
1045 
1046 static int
test_simde_mm_castsi128_ps(SIMDE_MUNIT_TEST_ARGS)1047 test_simde_mm_castsi128_ps(SIMDE_MUNIT_TEST_ARGS) {
1048   simde__m128i test_vec[8] = {
1049     simde_mm_set_epi32(INT32_C(-1036963898), INT32_C( 1847069037), INT32_C(  740321504), INT32_C(  778754840)),
1050     simde_mm_set_epi32(INT32_C(  975137998), INT32_C( -252397546), INT32_C( 1504697866), INT32_C(-1327032545)),
1051     simde_mm_set_epi32(INT32_C(-1494981423), INT32_C( -175189577), INT32_C( 2056595322), INT32_C( 1080531273)),
1052     simde_mm_set_epi32(INT32_C(-1391843620), INT32_C(  424327107), INT32_C(  948927709), INT32_C( -666077781)),
1053     simde_mm_set_epi32(INT32_C(  951847201), INT32_C( -299846327), INT32_C(  575809604), INT32_C(-1150359231)),
1054     simde_mm_set_epi32(INT32_C(  837564377), INT32_C( -933128035), INT32_C( -581372672), INT32_C( -490866291)),
1055     simde_mm_set_epi32(INT32_C( -169157316), INT32_C( 1521943175), INT32_C(  841770394), INT32_C( -192049832)),
1056     simde_mm_set_epi32(INT32_C( -848324384), INT32_C(-1699878899), INT32_C( -332340467), INT32_C(  934012294))
1057   };
1058 
1059   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1060     simde__m128i r = simde_mm_castps_si128(simde_mm_castsi128_ps(test_vec[i]));
1061     simde_assert_m128i_i64(r, ==, test_vec[i]);
1062   }
1063 
1064   return 0;
1065 }
1066 
1067 static int
test_simde_mm_cmpeq_epi8(SIMDE_MUNIT_TEST_ARGS)1068 test_simde_mm_cmpeq_epi8(SIMDE_MUNIT_TEST_ARGS) {
1069   const struct {
1070     simde__m128i a;
1071     simde__m128i b;
1072     simde__m128i r;
1073   } test_vec[8] = {
1074     { simde_mm_set_epi8(INT8_C(  48), INT8_C( -17), INT8_C(  87), INT8_C(  -4),
1075                         INT8_C(   4), INT8_C( -44), INT8_C( 121), INT8_C(  68),
1076                         INT8_C(  49), INT8_C(-108), INT8_C(  49), INT8_C( -79),
1077                         INT8_C(  51), INT8_C( -82), INT8_C(  23), INT8_C( -58)),
1078       simde_mm_set_epi8(INT8_C(  87), INT8_C( -42), INT8_C(  33), INT8_C( 126),
1079                         INT8_C(  91), INT8_C( 115), INT8_C( -90), INT8_C(  48),
1080                         INT8_C( -49), INT8_C(-119), INT8_C(  23), INT8_C(  50),
1081                         INT8_C( -10), INT8_C( -15), INT8_C( -16), INT8_C( -58)),
1082       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1083                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1084                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1085                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1)) },
1086     { simde_mm_set_epi8(INT8_C(  90), INT8_C( -93), INT8_C( 121), INT8_C( 114),
1087                         INT8_C(  21), INT8_C( -52), INT8_C( -45), INT8_C( -83),
1088                         INT8_C(-123), INT8_C(-119), INT8_C( -53), INT8_C(-117),
1089                         INT8_C( -60), INT8_C( -20), INT8_C(-100), INT8_C(  26)),
1090       simde_mm_set_epi8(INT8_C(  67), INT8_C( -34), INT8_C(-110), INT8_C( -79),
1091                         INT8_C( -72), INT8_C( -43), INT8_C(  64), INT8_C( -74),
1092                         INT8_C(  64), INT8_C(  85), INT8_C( -71), INT8_C(  89),
1093                         INT8_C(  35), INT8_C(  81), INT8_C( 104), INT8_C( 111)),
1094       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1095                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1096                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1097                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0)) },
1098     { simde_mm_set_epi8(INT8_C(  33), INT8_C(  56), INT8_C( -20), INT8_C(  49),
1099                         INT8_C( -77), INT8_C(-123), INT8_C( -77), INT8_C(-109),
1100                         INT8_C( -13), INT8_C(  91), INT8_C( 105), INT8_C(  29),
1101                         INT8_C(  35), INT8_C( -62), INT8_C(  39), INT8_C( -24)),
1102       simde_mm_set_epi8(INT8_C( -80), INT8_C( -37), INT8_C(  43), INT8_C( 121),
1103                         INT8_C(-104), INT8_C( -93), INT8_C(-100), INT8_C(  55),
1104                         INT8_C( -82), INT8_C( -92), INT8_C(  -6), INT8_C(   2),
1105                         INT8_C( -33), INT8_C( 114), INT8_C( -94), INT8_C(  58)),
1106       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1107                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1108                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1109                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0)) },
1110     { simde_mm_set_epi8(INT8_C( -50), INT8_C(  20), INT8_C(  99), INT8_C(   4),
1111                         INT8_C(  66), INT8_C( -69), INT8_C(-117), INT8_C(  25),
1112                         INT8_C( -96), INT8_C( -11), INT8_C( -75), INT8_C( -79),
1113                         INT8_C(  88), INT8_C(-123), INT8_C( -55), INT8_C(  22)),
1114       simde_mm_set_epi8(INT8_C(  68), INT8_C(-117), INT8_C(-113), INT8_C(  30),
1115                         INT8_C(   0), INT8_C(  65), INT8_C( -61), INT8_C( -31),
1116                         INT8_C( -53), INT8_C(  -2), INT8_C( -47), INT8_C(  20),
1117                         INT8_C( -79), INT8_C(-126), INT8_C(  40), INT8_C(  81)),
1118       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1119                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1120                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1121                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0)) },
1122     { simde_mm_set_epi8(INT8_C( 100), INT8_C( -54), INT8_C( -62), INT8_C( -41),
1123                         INT8_C(-110), INT8_C(  -3), INT8_C(-102), INT8_C(  -2),
1124                         INT8_C(  26), INT8_C( -67), INT8_C( -67), INT8_C( -73),
1125                         INT8_C(  18), INT8_C( 123), INT8_C( 122), INT8_C( 106)),
1126       simde_mm_set_epi8(INT8_C(  -5), INT8_C(   2), INT8_C( 119), INT8_C(  28),
1127                         INT8_C( -24), INT8_C(  12), INT8_C( 106), INT8_C( -55),
1128                         INT8_C( 124), INT8_C(  69), INT8_C(  31), INT8_C(-126),
1129                         INT8_C( -80), INT8_C( -78), INT8_C( -93), INT8_C( -23)),
1130       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1131                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1132                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1133                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0)) },
1134     { simde_mm_set_epi8(INT8_C(  48), INT8_C( -25), INT8_C( -80), INT8_C(  17),
1135                         INT8_C( -70), INT8_C( -40), INT8_C(  83), INT8_C(  37),
1136                         INT8_C(  22), INT8_C( -91), INT8_C( -79), INT8_C(   8),
1137                         INT8_C(   9), INT8_C( -21), INT8_C( -51), INT8_C( -21)),
1138       simde_mm_set_epi8(INT8_C(  55), INT8_C( 114), INT8_C( -79), INT8_C( -59),
1139                         INT8_C(  15), INT8_C( -50), INT8_C( -69), INT8_C(   7),
1140                         INT8_C(-113), INT8_C( -95), INT8_C( 112), INT8_C(   5),
1141                         INT8_C( -30), INT8_C( -68), INT8_C( -27), INT8_C( -43)),
1142       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1143                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1144                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1145                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0)) },
1146     { simde_mm_set_epi8(INT8_C(  34), INT8_C( 111), INT8_C(  52), INT8_C(  93),
1147                         INT8_C( -12), INT8_C(  98), INT8_C( -88), INT8_C(  63),
1148                         INT8_C(  64), INT8_C( -98), INT8_C(  18), INT8_C(  40),
1149                         INT8_C( 119), INT8_C(  68), INT8_C( -90), INT8_C( -37)),
1150       simde_mm_set_epi8(INT8_C(  35), INT8_C(  97), INT8_C(   3), INT8_C(  88),
1151                         INT8_C( -70), INT8_C( -12), INT8_C( -13), INT8_C(  52),
1152                         INT8_C( 127), INT8_C(  -5), INT8_C( -24), INT8_C( -10),
1153                         INT8_C( -21), INT8_C(-112), INT8_C( -81), INT8_C(  86)),
1154       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1155                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1156                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1157                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0)) },
1158     { simde_mm_set_epi8(INT8_C( -25), INT8_C(-100), INT8_C( -66), INT8_C( 103),
1159                         INT8_C(-103), INT8_C( 116), INT8_C( -97), INT8_C( -43),
1160                         INT8_C( 123), INT8_C( -33), INT8_C( -71), INT8_C(-122),
1161                         INT8_C( 100), INT8_C( 116), INT8_C(  67), INT8_C(-119)),
1162       simde_mm_set_epi8(INT8_C( 103), INT8_C( -84), INT8_C( 102), INT8_C( -67),
1163                         INT8_C( -82), INT8_C(  14), INT8_C( -17), INT8_C( -71),
1164                         INT8_C( -31), INT8_C(-109), INT8_C( -84), INT8_C( -22),
1165                         INT8_C(  78), INT8_C(-120), INT8_C( -77), INT8_C(  -6)),
1166       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1167                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1168                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1169                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0)) }
1170   };
1171 
1172   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1173     simde__m128i r = simde_mm_cmpeq_epi8(test_vec[i].a, test_vec[i].b);
1174     simde_assert_m128i_i8(r, ==, test_vec[i].r);
1175   }
1176 
1177   return 0;
1178 }
1179 
1180 static int
test_simde_mm_cmpeq_epi16(SIMDE_MUNIT_TEST_ARGS)1181 test_simde_mm_cmpeq_epi16(SIMDE_MUNIT_TEST_ARGS) {
1182   const struct {
1183     simde__m128i a;
1184     simde__m128i b;
1185     simde__m128i r;
1186   } test_vec[8] = {
1187     { simde_mm_set_epi16(INT16_C(  5875), INT16_C(-30240), INT16_C(  4973), INT16_C(-24835),
1188                          INT16_C( -2682), INT16_C( 25733), INT16_C(  1837), INT16_C( -8035)),
1189       simde_mm_set_epi16(INT16_C(  5875), INT16_C(-30240), INT16_C(  9332), INT16_C(-24835),
1190                          INT16_C( -8998), INT16_C( 25733), INT16_C(  1837), INT16_C(-18483)),
1191       simde_mm_set_epi16(INT16_C(    -1), INT16_C(    -1), INT16_C(     0), INT16_C(    -1),
1192                          INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(     0)) },
1193     { simde_mm_set_epi16(INT16_C( -1458), INT16_C( 12290), INT16_C(   394), INT16_C(  6014),
1194                          INT16_C( 25725), INT16_C( 16049), INT16_C(  -659), INT16_C( 13250)),
1195       simde_mm_set_epi16(INT16_C( -1458), INT16_C( 12290), INT16_C(   394), INT16_C(  6014),
1196                          INT16_C( 25725), INT16_C(-30312), INT16_C(  -659), INT16_C( 20372)),
1197       simde_mm_set_epi16(INT16_C(    -1), INT16_C(    -1), INT16_C(    -1), INT16_C(    -1),
1198                          INT16_C(    -1), INT16_C(     0), INT16_C(    -1), INT16_C(     0)) },
1199     { simde_mm_set_epi16(INT16_C( -4375), INT16_C(  1648), INT16_C( -8256), INT16_C( 10030),
1200                          INT16_C( 20444), INT16_C( -7330), INT16_C( -7889), INT16_C( 23879)),
1201       simde_mm_set_epi16(INT16_C(  -644), INT16_C(  1648), INT16_C( -8256), INT16_C( 10030),
1202                          INT16_C(  4813), INT16_C( -7330), INT16_C(-10599), INT16_C(-13677)),
1203       simde_mm_set_epi16(INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(    -1),
1204                          INT16_C(     0), INT16_C(    -1), INT16_C(     0), INT16_C(     0)) },
1205     { simde_mm_set_epi16(INT16_C(-20962), INT16_C(-30592), INT16_C(-23740), INT16_C( -1360),
1206                          INT16_C(  6756), INT16_C( 10080), INT16_C( 31194), INT16_C(-10248)),
1207       simde_mm_set_epi16(INT16_C(-20962), INT16_C(-19403), INT16_C( 31222), INT16_C( 12369),
1208                          INT16_C( 10909), INT16_C( 10080), INT16_C( 31194), INT16_C(-10248)),
1209       simde_mm_set_epi16(INT16_C(    -1), INT16_C(     0), INT16_C(     0), INT16_C(     0),
1210                          INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(    -1)) },
1211     { simde_mm_set_epi16(INT16_C( -5540), INT16_C(-14756), INT16_C(-15994), INT16_C(  1795),
1212                          INT16_C( 18849), INT16_C( 15779), INT16_C(  5314), INT16_C(-13448)),
1213       simde_mm_set_epi16(INT16_C( -5540), INT16_C( 14083), INT16_C(-16603), INT16_C(  1795),
1214                          INT16_C( 28557), INT16_C(-32040), INT16_C(  5314), INT16_C( -4887)),
1215       simde_mm_set_epi16(INT16_C(    -1), INT16_C(     0), INT16_C(     0), INT16_C(    -1),
1216                          INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(     0)) },
1217     { simde_mm_set_epi16(INT16_C(-18621), INT16_C(  6869), INT16_C(-16161), INT16_C(-24568),
1218                          INT16_C(-10576), INT16_C( 20065), INT16_C( -8241), INT16_C(-21658)),
1219       simde_mm_set_epi16(INT16_C(-18621), INT16_C(  6869), INT16_C(-10830), INT16_C(-24568),
1220                          INT16_C(-10576), INT16_C( 20065), INT16_C( -8094), INT16_C(-21658)),
1221       simde_mm_set_epi16(INT16_C(    -1), INT16_C(    -1), INT16_C(     0), INT16_C(    -1),
1222                          INT16_C(    -1), INT16_C(    -1), INT16_C(     0), INT16_C(    -1)) },
1223     { simde_mm_set_epi16(INT16_C(-20765), INT16_C( 27683), INT16_C( 13646), INT16_C( 26224),
1224                          INT16_C(-12316), INT16_C( -2556), INT16_C( -1320), INT16_C(-15938)),
1225       simde_mm_set_epi16(INT16_C( -5976), INT16_C( 27683), INT16_C( -6395), INT16_C( 26224),
1226                          INT16_C(-12316), INT16_C( -2556), INT16_C( -1320), INT16_C(-15143)),
1227       simde_mm_set_epi16(INT16_C(     0), INT16_C(    -1), INT16_C(     0), INT16_C(    -1),
1228                          INT16_C(    -1), INT16_C(    -1), INT16_C(    -1), INT16_C(     0)) },
1229     { simde_mm_set_epi16(INT16_C( 25864), INT16_C( 17430), INT16_C( 25473), INT16_C( 24392),
1230                          INT16_C( 27481), INT16_C(  2288), INT16_C( 24811), INT16_C( 18514)),
1231       simde_mm_set_epi16(INT16_C( 25864), INT16_C(  8829), INT16_C( 25473), INT16_C( 24392),
1232                          INT16_C( 27481), INT16_C(  4599), INT16_C( 24811), INT16_C( 18514)),
1233       simde_mm_set_epi16(INT16_C(    -1), INT16_C(     0), INT16_C(    -1), INT16_C(    -1),
1234                          INT16_C(    -1), INT16_C(     0), INT16_C(    -1), INT16_C(    -1)) }
1235   };
1236 
1237   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1238     simde__m128i r = simde_mm_cmpeq_epi16(test_vec[i].a, test_vec[i].b);
1239     simde_assert_m128i_i16(r, ==, test_vec[i].r);
1240   }
1241 
1242   return 0;
1243 }
1244 
1245 static int
test_simde_mm_cmpeq_epi32(SIMDE_MUNIT_TEST_ARGS)1246 test_simde_mm_cmpeq_epi32(SIMDE_MUNIT_TEST_ARGS) {
1247   const struct {
1248     simde__m128i a;
1249     simde__m128i b;
1250     simde__m128i r;
1251   } test_vec[8] = {
1252     { simde_mm_set_epi32(  385059296,   325951229,  -175741819,   120447133),
1253       simde_mm_set_epi32( -411118693,   325951229,  -175741819,   -74467379),
1254       simde_mm_set_epi32(          0,          -1,          -1,           0) },
1255     { simde_mm_set_epi32( 2086724423,  2071647391,   395772386,  -878201179),
1256       simde_mm_set_epi32( 2086724423,    25827198,  1685929649,   -43174974),
1257       simde_mm_set_epi32(         -1,           0,           0,           0) },
1258     { simde_mm_set_epi32(-1656549033,  -529471298,  -677159845, -1011499644),
1259       simde_mm_set_epi32(-1108138959,  2008596507,    36966751, -1011499644),
1260       simde_mm_set_epi32(          0,           0,           0,          -1) },
1261     { simde_mm_set_epi32(  -42154427,   232395060,   315449676,  -694564205),
1262       simde_mm_set_epi32(-1781616670,   232395060,   315449676,  -694564205),
1263       simde_mm_set_epi32(          0,          -1,          -1,          -1) },
1264     { simde_mm_set_epi32(-1373730688, -1555760464,   442771296,  2044385272),
1265       simde_mm_set_epi32( -819547083, -1555760464,   442771296,  2044385272),
1266       simde_mm_set_epi32(          0,          -1,          -1,          -1) },
1267     { simde_mm_set_epi32( -285007987,  1222927916,  -234086536,   711157928),
1268       simde_mm_set_epi32( -285007987,  1222927916,  1235303843,   711157928),
1269       simde_mm_set_epi32(         -1,          -1,           0,          -1) },
1270     { simde_mm_set_epi32( 1734698060,  -250509290,  -430142591,   970705024),
1271       simde_mm_set_epi32( 1734698060, -1399422252,  -430142591, -1199939349),
1272       simde_mm_set_epi32(         -1,           0,          -1,           0) },
1273     { simde_mm_set_epi32( 1285559999,  -709744735, -1852486552,  -530433851),
1274       simde_mm_set_epi32( 1285559999,  -709744735, -1768521466,  -530433851),
1275       simde_mm_set_epi32(         -1,          -1,           0,          -1) }
1276   };
1277 
1278   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1279     simde__m128i r = simde_mm_cmpeq_epi32(test_vec[i].a, test_vec[i].b);
1280     simde_assert_m128i_i32(r, ==, test_vec[i].r);
1281   }
1282 
1283   return 0;
1284 }
1285 
1286 static int
test_simde_mm_cmpeq_pd(SIMDE_MUNIT_TEST_ARGS)1287 test_simde_mm_cmpeq_pd(SIMDE_MUNIT_TEST_ARGS) {
1288   const struct {
1289     simde__m128d a;
1290     simde__m128d b;
1291     simde__m128i r;
1292   } test_vec[8] = {
1293    { simde_mm_set_pd(SIMDE_FLOAT64_C(  780.35), SIMDE_FLOAT64_C(  826.07)),
1294       simde_mm_set_pd(SIMDE_FLOAT64_C(  625.03), SIMDE_FLOAT64_C(  826.07)),
1295       simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C(-1), INT32_C(-1)) },
1296     { simde_mm_set_pd(SIMDE_FLOAT64_C( -334.66), SIMDE_FLOAT64_C(  476.36)),
1297       simde_mm_set_pd(SIMDE_FLOAT64_C( -334.66), SIMDE_FLOAT64_C(  556.75)),
1298       simde_mm_set_epi32(INT32_C(-1), INT32_C(-1), INT32_C( 0), INT32_C( 0)) },
1299     { simde_mm_set_pd(SIMDE_FLOAT64_C(  324.97), SIMDE_FLOAT64_C(  726.67)),
1300       simde_mm_set_pd(SIMDE_FLOAT64_C( -886.03), SIMDE_FLOAT64_C(  556.75)),
1301       simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
1302     { simde_mm_set_pd(SIMDE_FLOAT64_C(  432.42), SIMDE_FLOAT64_C(  208.20)),
1303       simde_mm_set_pd(SIMDE_FLOAT64_C( -886.03), SIMDE_FLOAT64_C(  556.75)),
1304       simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
1305     { simde_mm_set_pd(SIMDE_FLOAT64_C(  361.87), SIMDE_FLOAT64_C( -173.19)),
1306       simde_mm_set_pd(SIMDE_FLOAT64_C(  190.30), SIMDE_FLOAT64_C( -730.35)),
1307       simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
1308     { simde_mm_set_pd(SIMDE_FLOAT64_C(  187.80), SIMDE_FLOAT64_C( -428.45)),
1309       simde_mm_set_pd(SIMDE_FLOAT64_C(  754.99), SIMDE_FLOAT64_C( -730.35)),
1310       simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
1311     { simde_mm_set_pd(SIMDE_FLOAT64_C(   34.65), SIMDE_FLOAT64_C(  814.87)),
1312       simde_mm_set_pd(SIMDE_FLOAT64_C(  105.60), SIMDE_FLOAT64_C(  292.36)),
1313       simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) },
1314     { simde_mm_set_pd(SIMDE_FLOAT64_C(  221.70), SIMDE_FLOAT64_C( -277.53)),
1315       simde_mm_set_pd(SIMDE_FLOAT64_C( -578.28), SIMDE_FLOAT64_C(  292.36)),
1316       simde_mm_set_epi32(INT32_C( 0), INT32_C( 0), INT32_C( 0), INT32_C( 0)) }
1317   };
1318 
1319   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1320     simde__m128i r = simde_mm_castpd_si128(simde_mm_cmpeq_pd(test_vec[i].a, test_vec[i].b));
1321     simde_assert_m128i_i32(r, ==, test_vec[i].r);
1322   }
1323 
1324   return 0;
1325 }
1326 
1327 static int
test_simde_mm_cmpeq_sd(SIMDE_MUNIT_TEST_ARGS)1328 test_simde_mm_cmpeq_sd(SIMDE_MUNIT_TEST_ARGS) {
1329   simde__m128d all_unset = simde_mm_setzero_pd();
1330   simde__m128d all_set = simde_mm_cmpeq_pd(all_unset, all_unset);
1331 
1332   const struct {
1333     simde__m128d a;
1334     simde__m128d b;
1335     simde__m128d r;
1336   } test_vec[8] = {
1337     { simde_mm_set_pd(SIMDE_FLOAT64_C(  386.34), SIMDE_FLOAT64_C(  460.38)),
1338       simde_mm_set_pd(SIMDE_FLOAT64_C( -116.45), SIMDE_FLOAT64_C(  460.38)),
1339       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  386.34), SIMDE_FLOAT64_C(  460.38)), all_set) },
1340     { simde_mm_set_pd(SIMDE_FLOAT64_C( -493.11), SIMDE_FLOAT64_C(   58.42)),
1341       simde_mm_set_pd(SIMDE_FLOAT64_C(  511.42), SIMDE_FLOAT64_C(   58.42)),
1342       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -493.11), SIMDE_FLOAT64_C(   58.42)), all_set) },
1343     { simde_mm_set_pd(SIMDE_FLOAT64_C(  156.83), SIMDE_FLOAT64_C( -432.98)),
1344       simde_mm_set_pd(SIMDE_FLOAT64_C(  156.83), SIMDE_FLOAT64_C( -422.70)),
1345       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  156.83), SIMDE_FLOAT64_C( -432.98)), all_unset) },
1346     { simde_mm_set_pd(SIMDE_FLOAT64_C(  422.21), SIMDE_FLOAT64_C( -577.06)),
1347       simde_mm_set_pd(SIMDE_FLOAT64_C(  404.92), SIMDE_FLOAT64_C( -577.06)),
1348       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  422.21), SIMDE_FLOAT64_C( -577.06)), all_set) },
1349     { simde_mm_set_pd(SIMDE_FLOAT64_C( -468.33), SIMDE_FLOAT64_C(  -68.91)),
1350       simde_mm_set_pd(SIMDE_FLOAT64_C( -638.04), SIMDE_FLOAT64_C(  816.57)),
1351       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -468.33), SIMDE_FLOAT64_C(  -68.91)), all_unset) },
1352     { simde_mm_set_pd(SIMDE_FLOAT64_C(   37.53), SIMDE_FLOAT64_C(  339.53)),
1353       simde_mm_set_pd(SIMDE_FLOAT64_C(   37.53), SIMDE_FLOAT64_C(  339.53)),
1354       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(   37.53), SIMDE_FLOAT64_C(  339.53)), all_set) },
1355     { simde_mm_set_pd(SIMDE_FLOAT64_C( -872.05), SIMDE_FLOAT64_C( -696.39)),
1356       simde_mm_set_pd(SIMDE_FLOAT64_C( -872.05), SIMDE_FLOAT64_C( -696.39)),
1357       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -872.05), SIMDE_FLOAT64_C( -696.39)), all_set) },
1358     { simde_mm_set_pd(SIMDE_FLOAT64_C(  251.77), SIMDE_FLOAT64_C( -366.11)),
1359       simde_mm_set_pd(SIMDE_FLOAT64_C(  251.77), SIMDE_FLOAT64_C( -622.95)),
1360       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  251.77), SIMDE_FLOAT64_C( -366.11)), all_unset) }
1361   };
1362 
1363   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1364     simde__m128d r = simde_mm_cmpeq_sd(test_vec[i].a, test_vec[i].b);
1365     simde_assert_m128d_equal(r, test_vec[i].r);
1366   }
1367 
1368   return 0;
1369 }
1370 
1371 static int
test_simde_mm_cmpneq_pd(SIMDE_MUNIT_TEST_ARGS)1372 test_simde_mm_cmpneq_pd(SIMDE_MUNIT_TEST_ARGS) {
1373   const struct {
1374     simde__m128d a;
1375     simde__m128d b;
1376     simde__m128i r;
1377   } test_vec[8] = {
1378     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -17.42), SIMDE_FLOAT64_C( -471.42)),
1379       simde_mm_set_pd(SIMDE_FLOAT64_C( -120.90), SIMDE_FLOAT64_C( -471.42)),
1380       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1381     { simde_mm_set_pd(SIMDE_FLOAT64_C(  174.13), SIMDE_FLOAT64_C(  302.06)),
1382       simde_mm_set_pd(SIMDE_FLOAT64_C( -462.00), SIMDE_FLOAT64_C(  302.06)),
1383       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1384     { simde_mm_set_pd(SIMDE_FLOAT64_C(  456.61), SIMDE_FLOAT64_C(  -31.59)),
1385       simde_mm_set_pd(SIMDE_FLOAT64_C(  456.61), SIMDE_FLOAT64_C(  -31.59)),
1386       simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1387     { simde_mm_set_pd(SIMDE_FLOAT64_C(  863.75), SIMDE_FLOAT64_C(  743.68)),
1388       simde_mm_set_pd(SIMDE_FLOAT64_C(  863.75), SIMDE_FLOAT64_C( -940.38)),
1389       simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) },
1390     { simde_mm_set_pd(SIMDE_FLOAT64_C( -207.80), SIMDE_FLOAT64_C(  181.86)),
1391       simde_mm_set_pd(SIMDE_FLOAT64_C( -207.80), SIMDE_FLOAT64_C(  980.93)),
1392       simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) },
1393     { simde_mm_set_pd(SIMDE_FLOAT64_C( -330.41), SIMDE_FLOAT64_C(  936.80)),
1394       simde_mm_set_pd(SIMDE_FLOAT64_C( -223.97), SIMDE_FLOAT64_C(  936.80)),
1395       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1396     { simde_mm_set_pd(SIMDE_FLOAT64_C( -293.06), SIMDE_FLOAT64_C( -978.73)),
1397       simde_mm_set_pd(SIMDE_FLOAT64_C( -858.76), SIMDE_FLOAT64_C( -978.73)),
1398       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1399     { simde_mm_set_pd(SIMDE_FLOAT64_C(  215.10), SIMDE_FLOAT64_C( -720.29)),
1400       simde_mm_set_pd(SIMDE_FLOAT64_C( -813.22), SIMDE_FLOAT64_C(  235.59)),
1401       simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) }
1402   };
1403 
1404   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1405     simde__m128i r = simde_mm_castpd_si128(simde_mm_cmpneq_pd(test_vec[i].a, test_vec[i].b));
1406     simde_assert_m128i_i64(r, ==, test_vec[i].r);
1407   }
1408 
1409   return 0;
1410 }
1411 
1412 static int
test_simde_mm_cmpneq_sd(SIMDE_MUNIT_TEST_ARGS)1413 test_simde_mm_cmpneq_sd(SIMDE_MUNIT_TEST_ARGS) {
1414   const struct {
1415     simde__m128d a;
1416     simde__m128d b;
1417     simde__m128d r;
1418   } test_vec[8] = {
1419     { simde_mm_set_pd(SIMDE_FLOAT64_C(  489.91), SIMDE_FLOAT64_C(  496.15)),
1420       simde_mm_set_pd(SIMDE_FLOAT64_C(  -40.59), SIMDE_FLOAT64_C(  496.15)),
1421       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  489.91), SIMDE_FLOAT64_C(  496.15)), simde_mm_setzero_pd()) },
1422     { simde_mm_set_pd(SIMDE_FLOAT64_C( -981.34), SIMDE_FLOAT64_C(  944.87)),
1423       simde_mm_set_pd(SIMDE_FLOAT64_C( -433.21), SIMDE_FLOAT64_C(  882.20)),
1424       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -981.34), SIMDE_FLOAT64_C(  944.87)), simde_x_mm_setone_pd()) },
1425     { simde_mm_set_pd(SIMDE_FLOAT64_C(  983.93), SIMDE_FLOAT64_C(  764.39)),
1426       simde_mm_set_pd(SIMDE_FLOAT64_C(  621.75), SIMDE_FLOAT64_C(  764.39)),
1427       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  983.93), SIMDE_FLOAT64_C(  764.39)), simde_mm_setzero_pd()) },
1428     { simde_mm_set_pd(SIMDE_FLOAT64_C(  274.45), SIMDE_FLOAT64_C(  789.62)),
1429       simde_mm_set_pd(SIMDE_FLOAT64_C(  274.45), SIMDE_FLOAT64_C(  789.62)),
1430       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  274.45), SIMDE_FLOAT64_C(  789.62)), simde_mm_setzero_pd()) },
1431     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -35.27), SIMDE_FLOAT64_C(   92.02)),
1432       simde_mm_set_pd(SIMDE_FLOAT64_C(  -35.27), SIMDE_FLOAT64_C(   92.02)),
1433       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  -35.27), SIMDE_FLOAT64_C(   92.02)), simde_mm_setzero_pd()) },
1434     { simde_mm_set_pd(SIMDE_FLOAT64_C( -842.06), SIMDE_FLOAT64_C( -358.82)),
1435       simde_mm_set_pd(SIMDE_FLOAT64_C(  290.56), SIMDE_FLOAT64_C(  859.30)),
1436       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -842.06), SIMDE_FLOAT64_C( -358.82)), simde_x_mm_setone_pd()) },
1437     { simde_mm_set_pd(SIMDE_FLOAT64_C( -575.93), SIMDE_FLOAT64_C( -661.58)),
1438       simde_mm_set_pd(SIMDE_FLOAT64_C( -462.75), SIMDE_FLOAT64_C(  732.75)),
1439       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -575.93), SIMDE_FLOAT64_C( -661.58)), simde_x_mm_setone_pd()) },
1440     { simde_mm_set_pd(SIMDE_FLOAT64_C( -876.76), SIMDE_FLOAT64_C( -235.41)),
1441       simde_mm_set_pd(SIMDE_FLOAT64_C(  264.94), SIMDE_FLOAT64_C(  767.34)),
1442       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -876.76), SIMDE_FLOAT64_C( -235.41)), simde_x_mm_setone_pd()) }
1443   };
1444 
1445   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1446     simde__m128d r = simde_mm_cmpneq_sd(test_vec[i].a, test_vec[i].b);
1447     simde_assert_m128d_equal(r, test_vec[i].r);
1448   }
1449 
1450   return 0;
1451 }
1452 
1453 static int
test_simde_mm_cmplt_epi8(SIMDE_MUNIT_TEST_ARGS)1454 test_simde_mm_cmplt_epi8(SIMDE_MUNIT_TEST_ARGS) {
1455   const struct {
1456     simde__m128i a;
1457     simde__m128i b;
1458     simde__m128i r;
1459   } test_vec[8] = {
1460     { simde_mm_set_epi8(INT8_C( -98), INT8_C( 126), INT8_C( -78), INT8_C( -97),
1461                         INT8_C( -35), INT8_C( -49), INT8_C( -62), INT8_C(  -8),
1462                         INT8_C( -88), INT8_C(  71), INT8_C(  16), INT8_C(  -4),
1463                         INT8_C(  69), INT8_C( -61), INT8_C(  47), INT8_C(  84)),
1464       simde_mm_set_epi8(INT8_C(   5), INT8_C(-114), INT8_C( -27), INT8_C( -61),
1465                         INT8_C(  56), INT8_C( 115), INT8_C( -53), INT8_C(  16),
1466                         INT8_C( -80), INT8_C( -18), INT8_C(  83), INT8_C(  -9),
1467                         INT8_C(  -3), INT8_C(  36), INT8_C( -57), INT8_C(  89)),
1468       simde_mm_set_epi8(INT8_C(  -1), INT8_C(   0), INT8_C(  -1), INT8_C(  -1),
1469                         INT8_C(  -1), INT8_C(  -1), INT8_C(  -1), INT8_C(  -1),
1470                         INT8_C(  -1), INT8_C(   0), INT8_C(  -1), INT8_C(   0),
1471                         INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(  -1)) },
1472     { simde_mm_set_epi8(INT8_C(  94), INT8_C(  78), INT8_C( -26), INT8_C(-126),
1473                         INT8_C( -98), INT8_C(  65), INT8_C(  38), INT8_C( -71),
1474                         INT8_C( -54), INT8_C(  20), INT8_C( -52), INT8_C(  55),
1475                         INT8_C( -76), INT8_C(  37), INT8_C( -95), INT8_C(  91)),
1476       simde_mm_set_epi8(INT8_C( -10), INT8_C( -43), INT8_C(  70), INT8_C(  -4),
1477                         INT8_C( -89), INT8_C( -31), INT8_C( -61), INT8_C(  81),
1478                         INT8_C(  64), INT8_C( -78), INT8_C(  14), INT8_C( 125),
1479                         INT8_C(  81), INT8_C(  62), INT8_C(-124), INT8_C(  39)),
1480       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(  -1),
1481                         INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(  -1),
1482                         INT8_C(  -1), INT8_C(   0), INT8_C(  -1), INT8_C(  -1),
1483                         INT8_C(  -1), INT8_C(  -1), INT8_C(   0), INT8_C(   0)) },
1484     { simde_mm_set_epi8(INT8_C( 103), INT8_C(  49), INT8_C(  24), INT8_C(   0),
1485                         INT8_C( -50), INT8_C(  64), INT8_C(  11), INT8_C( 101),
1486                         INT8_C(  39), INT8_C(  41), INT8_C(-111), INT8_C( -32),
1487                         INT8_C(  91), INT8_C(  86), INT8_C(-117), INT8_C( 115)),
1488       simde_mm_set_epi8(INT8_C( -44), INT8_C(  47), INT8_C( -14), INT8_C( 109),
1489                         INT8_C(  44), INT8_C(  97), INT8_C( -41), INT8_C(  53),
1490                         INT8_C(-121), INT8_C( -57), INT8_C(  54), INT8_C( 124),
1491                         INT8_C(  50), INT8_C( -73), INT8_C( -30), INT8_C( -62)),
1492       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1),
1493                         INT8_C(  -1), INT8_C(  -1), INT8_C(   0), INT8_C(   0),
1494                         INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(  -1),
1495                         INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0)) },
1496     { simde_mm_set_epi8(INT8_C(-110), INT8_C(   5), INT8_C(  41), INT8_C(  -3),
1497                         INT8_C(-114), INT8_C(  14), INT8_C(-117), INT8_C( -89),
1498                         INT8_C(  52), INT8_C(  62), INT8_C(  41), INT8_C( -25),
1499                         INT8_C( 114), INT8_C(  56), INT8_C(  58), INT8_C( -99)),
1500       simde_mm_set_epi8(INT8_C( -31), INT8_C( -36), INT8_C(-126), INT8_C( -69),
1501                         INT8_C( 113), INT8_C( -30), INT8_C( -24), INT8_C(  69),
1502                         INT8_C( -15), INT8_C(-110), INT8_C(  23), INT8_C(  87),
1503                         INT8_C(-127), INT8_C( -64), INT8_C( -38), INT8_C( -83)),
1504       simde_mm_set_epi8(INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1505                         INT8_C(  -1), INT8_C(   0), INT8_C(  -1), INT8_C(  -1),
1506                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1),
1507                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1)) },
1508     { simde_mm_set_epi8(INT8_C( -38), INT8_C( -67), INT8_C( -79), INT8_C( -41),
1509                         INT8_C(-114), INT8_C(  37), INT8_C( -71), INT8_C(  11),
1510                         INT8_C( 105), INT8_C( 102), INT8_C(  48), INT8_C( 127),
1511                         INT8_C(  84), INT8_C( 115), INT8_C(-102), INT8_C( -24)),
1512       simde_mm_set_epi8(INT8_C(  94), INT8_C( -20), INT8_C( -97), INT8_C(  -2),
1513                         INT8_C(-113), INT8_C(  46), INT8_C( 123), INT8_C(  -9),
1514                         INT8_C(  35), INT8_C( -47), INT8_C(  90), INT8_C( -73),
1515                         INT8_C(-122), INT8_C(  -3), INT8_C(-116), INT8_C(  -4)),
1516       simde_mm_set_epi8(INT8_C(  -1), INT8_C(  -1), INT8_C(   0), INT8_C(  -1),
1517                         INT8_C(  -1), INT8_C(  -1), INT8_C(  -1), INT8_C(   0),
1518                         INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0),
1519                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1)) },
1520     { simde_mm_set_epi8(INT8_C(  27), INT8_C(  41), INT8_C(  36), INT8_C( -97),
1521                         INT8_C( -84), INT8_C( 108), INT8_C( -37), INT8_C( -69),
1522                         INT8_C( -29), INT8_C(  45), INT8_C( 101), INT8_C( 104),
1523                         INT8_C( 102), INT8_C( -85), INT8_C(   3), INT8_C( 124)),
1524       simde_mm_set_epi8(INT8_C(-119), INT8_C(  16), INT8_C( -80), INT8_C(  97),
1525                         INT8_C(  97), INT8_C( -44), INT8_C(  71), INT8_C( -43),
1526                         INT8_C(  39), INT8_C( -54), INT8_C(  15), INT8_C( -61),
1527                         INT8_C( 100), INT8_C( -92), INT8_C(   5), INT8_C( -93)),
1528       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1),
1529                         INT8_C(  -1), INT8_C(   0), INT8_C(  -1), INT8_C(  -1),
1530                         INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1531                         INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0)) },
1532     { simde_mm_set_epi8(INT8_C(  53), INT8_C(  82), INT8_C(  -6), INT8_C(  99),
1533                         INT8_C(  95), INT8_C( -34), INT8_C( -90), INT8_C( -14),
1534                         INT8_C( -43), INT8_C( -72), INT8_C( -83), INT8_C(-104),
1535                         INT8_C(  -1), INT8_C( -60), INT8_C( 103), INT8_C( -66)),
1536       simde_mm_set_epi8(INT8_C(  13), INT8_C( 118), INT8_C(  25), INT8_C(  60),
1537                         INT8_C( -83), INT8_C( -43), INT8_C(  90), INT8_C(  54),
1538                         INT8_C( -84), INT8_C(-125), INT8_C( -41), INT8_C(  52),
1539                         INT8_C(  18), INT8_C(  46), INT8_C( 126), INT8_C( -65)),
1540       simde_mm_set_epi8(INT8_C(   0), INT8_C(  -1), INT8_C(  -1), INT8_C(   0),
1541                         INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(  -1),
1542                         INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(  -1),
1543                         INT8_C(  -1), INT8_C(  -1), INT8_C(  -1), INT8_C(  -1)) },
1544     { simde_mm_set_epi8(INT8_C(  43), INT8_C( 123), INT8_C( -13), INT8_C(  35),
1545                         INT8_C(-119), INT8_C(  53), INT8_C( -35), INT8_C( -46),
1546                         INT8_C(  44), INT8_C(  69), INT8_C(  50), INT8_C(-120),
1547                         INT8_C(   2), INT8_C(  50), INT8_C( -95), INT8_C(  46)),
1548       simde_mm_set_epi8(INT8_C( -57), INT8_C( -76), INT8_C(-104), INT8_C(-127),
1549                         INT8_C( -27), INT8_C( 127), INT8_C( 127), INT8_C(-109),
1550                         INT8_C(  40), INT8_C( -63), INT8_C(  87), INT8_C( -27),
1551                         INT8_C(  -1), INT8_C(-101), INT8_C(  11), INT8_C(  44)),
1552       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
1553                         INT8_C(  -1), INT8_C(  -1), INT8_C(  -1), INT8_C(   0),
1554                         INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(  -1),
1555                         INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0)) }
1556   };
1557 
1558   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
1559     simde__m128i r = simde_mm_cmplt_epi8(test_vec[i].a, test_vec[i].b);
1560     simde_assert_m128i_i8(r, ==, test_vec[i].r);
1561   }
1562 
1563   return 0;
1564 }
1565 
1566 static int
test_simde_mm_cmplt_epi16(SIMDE_MUNIT_TEST_ARGS)1567 test_simde_mm_cmplt_epi16(SIMDE_MUNIT_TEST_ARGS) {
1568   const struct {
1569     simde__m128i a;
1570     simde__m128i b;
1571     simde__m128i r;
1572   } test_vec[8] = {
1573     { simde_mm_set_epi16(INT16_C(-19152), INT16_C(  7219), INT16_C(  8875), INT16_C(-12109),
1574                          INT16_C( -6164), INT16_C(-29571), INT16_C( 29544), INT16_C( 12828)),
1575       simde_mm_set_epi16(INT16_C(-19152), INT16_C( -1176), INT16_C(-32721), INT16_C( 28268),
1576                          INT16_C( 28536), INT16_C(-24890), INT16_C(-20501), INT16_C( 12828)),
1577       simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(    -1),
1578                          INT16_C(    -1), INT16_C(    -1), INT16_C(     0), INT16_C(     0)) },
1579     { simde_mm_set_epi16(INT16_C(  -385), INT16_C( 10411), INT16_C( -4671), INT16_C( 18534),
1580                          INT16_C( 18234), INT16_C(  8064), INT16_C(-32746), INT16_C(  1460)),
1581       simde_mm_set_epi16(INT16_C(-11261), INT16_C( 19475), INT16_C( -4671), INT16_C(-23700),
1582                          INT16_C(  8656), INT16_C(  8064), INT16_C(-28801), INT16_C(  5582)),
1583       simde_mm_set_epi16(INT16_C(     0), INT16_C(    -1), INT16_C(     0), INT16_C(     0),
1584                          INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(    -1)) },
1585     { simde_mm_set_epi16(INT16_C( 23112), INT16_C( 21760), INT16_C(-29652), INT16_C( -7707),
1586                          INT16_C(  4438), INT16_C(-14112), INT16_C(   617), INT16_C(-29125)),
1587       simde_mm_set_epi16(INT16_C(-20847), INT16_C(-17750), INT16_C(  7413), INT16_C( 13270),
1588                          INT16_C( 30220), INT16_C(-14112), INT16_C(  -140), INT16_C( 23495)),
1589       simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(    -1),
1590                          INT16_C(    -1), INT16_C(     0), INT16_C(     0), INT16_C(    -1)) },
1591     { simde_mm_set_epi16(INT16_C( 12262), INT16_C(-26458), INT16_C(-17793), INT16_C( 15097),
1592                          INT16_C(-28884), INT16_C(   -39), INT16_C( 29206), INT16_C( 24614)),
1593       simde_mm_set_epi16(INT16_C(-12392), INT16_C(-30769), INT16_C(-17793), INT16_C( 15097),
1594                          INT16_C( 22525), INT16_C(  7510), INT16_C( 28529), INT16_C( -9470)),
1595       simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0),
1596                          INT16_C(    -1), INT16_C(    -1), INT16_C(     0), INT16_C(     0)) },
1597     { simde_mm_set_epi16(INT16_C( 14581), INT16_C( 30465), INT16_C( 26611), INT16_C(-25355),
1598                          INT16_C( 12222), INT16_C(-12322), INT16_C(   176), INT16_C( -4760)),
1599       simde_mm_set_epi16(INT16_C( 10242), INT16_C( 15750), INT16_C(-11513), INT16_C(  7111),
1600                          INT16_C(-29171), INT16_C(-12322), INT16_C(   176), INT16_C( -4760)),
1601       simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(    -1),
1602                          INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0)) },
1603     { simde_mm_set_epi16(INT16_C(-12610), INT16_C( 30415), INT16_C(-22748), INT16_C(  8564),
1604                          INT16_C(-28400), INT16_C(-22984), INT16_C(-31130), INT16_C(  2400)),
1605       simde_mm_set_epi16(INT16_C( 17489), INT16_C(-18807), INT16_C( 19401), INT16_C(   -73),
1606                          INT16_C(-28400), INT16_C( -7356), INT16_C( 31412), INT16_C(  2400)),
1607       simde_mm_set_epi16(INT16_C(    -1), INT16_C(     0), INT16_C(    -1), INT16_C(     0),
1608                          INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(     0)) },
1609     { simde_mm_set_epi16(INT16_C(  5617), INT16_C( -8984), INT16_C( 20729), INT16_C( 15025),
1610                          INT16_C(-12038), INT16_C(-32017), INT16_C(-24693), INT16_C( -3874)),
1611       simde_mm_set_epi16(INT16_C( -8219), INT16_C( 19022), INT16_C(-28515), INT16_C( 15025),
1612                          INT16_C( -2982), INT16_C(  -314), INT16_C( 16536), INT16_C(-17813)),
1613       simde_mm_set_epi16(INT16_C(     0), INT16_C(    -1), INT16_C(     0), INT16_C(     0),
1614                          INT16_C(    -1), INT16_C(    -1), INT16_C(    -1), INT16_C(     0)) },
1615     { simde_mm_set_epi16(INT16_C(  3028), INT16_C( 25056), INT16_C(-30420), INT16_C(  3400),
1616                          INT16_C( 27498), INT16_C(-24168), INT16_C(-10264), INT16_C( -5651)),
1617       simde_mm_set_epi16(INT16_C( 16763), INT16_C(  3971), INT16_C(-30420), INT16_C(-13950),
1618                          INT16_C( 26793), INT16_C(-27284), INT16_C( 22512), INT16_C(-19434)),
1619       simde_mm_set_epi16(INT16_C(    -1), INT16_C(     0), INT16_C(     0), INT16_C(     0),
1620                          INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(     0)) }
1621   };
1622 
1623   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1624     simde__m128i r = simde_mm_cmplt_epi16(test_vec[i].a, test_vec[i].b);
1625     simde_assert_m128i_i16(r, ==, test_vec[i].r);
1626   }
1627 
1628   return 0;
1629 }
1630 
1631 static int
test_simde_mm_cmplt_epi32(SIMDE_MUNIT_TEST_ARGS)1632 test_simde_mm_cmplt_epi32(SIMDE_MUNIT_TEST_ARGS) {
1633   const struct {
1634     simde__m128i a;
1635     simde__m128i b;
1636     simde__m128i r;
1637   } test_vec[8] = {
1638     { simde_mm_set_epi32(-1255138253,   581685427,  -403927939,  1936208412),
1639       simde_mm_set_epi32(  212007784, -2144375188,  1870175942,  1936208412),
1640       simde_mm_set_epi32(         -1,           0,          -1,           0) },
1641     { simde_mm_set_epi32(-1412605706,  -573136614,  -789373589,  1859272017),
1642       simde_mm_set_epi32(-1412605706,  -306100122,  1194991488, -2146040396),
1643       simde_mm_set_epi32(          0,          -1,          -1,           0) },
1644     { simde_mm_set_epi32(-1857828629,  -865462431,  1845130162,  -790702535),
1645       simde_mm_set_epi32( 1020632409,  -786544507,   219144900,   222814568),
1646       simde_mm_set_epi32(         -1,          -1,           0,          -1) },
1647     { simde_mm_set_epi32(-1366181206,   485831638,  1980524634,    -9151545),
1648       simde_mm_set_epi32(-1932199485,   327347510,   706051828,  -541415230),
1649       simde_mm_set_epi32(          0,           0,           0,           0) },
1650     { simde_mm_set_epi32(  803641510, -1166066951, -1892876327,  1914069030),
1651       simde_mm_set_epi32( -812087345, -1002684270,  1476205910,  1869732610),
1652       simde_mm_set_epi32(          0,          -1,          -1,           0) },
1653     { simde_mm_set_epi32(-1773657387, -1529382252,  1397468980,  1171964570),
1654       simde_mm_set_epi32(  955610881,  1744018677,   801034206,  1171964570),
1655       simde_mm_set_epi32(         -1,          -1,           0,           0) },
1656     { simde_mm_set_epi32(-1807229965, -1210178631,  1522043695, -1735369601),
1657       simde_mm_set_epi32(-1560329504,  1101415557,  1311721597,  1371106332),
1658       simde_mm_set_epi32(         -1,          -1,           0,          -1) },
1659     { simde_mm_set_epi32( 1146205833,  1271529399,  1661264708,  2058651784),
1660       simde_mm_set_epi32(  624079870,  1320739553, -1066082248, -1119644266),
1661       simde_mm_set_epi32(          0,          -1,           0,           0) }
1662   };
1663 
1664   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1665     simde__m128i r = simde_mm_cmplt_epi32(test_vec[i].a, test_vec[i].b);
1666     simde_assert_m128i_i32(r, ==, test_vec[i].r);
1667   }
1668 
1669   return 0;
1670 }
1671 
1672 static int
test_simde_mm_cmplt_pd(SIMDE_MUNIT_TEST_ARGS)1673 test_simde_mm_cmplt_pd(SIMDE_MUNIT_TEST_ARGS) {
1674   const struct {
1675     simde__m128d a;
1676     simde__m128d b;
1677     simde__m128i r;
1678   } test_vec[8] = {
1679     { simde_mm_set_pd(SIMDE_FLOAT64_C(  415.53), SIMDE_FLOAT64_C(  -98.38)),
1680       simde_mm_set_pd(SIMDE_FLOAT64_C(  415.53), SIMDE_FLOAT64_C( -729.13)),
1681       simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1682     { simde_mm_set_pd(SIMDE_FLOAT64_C( -102.02), SIMDE_FLOAT64_C( -129.13)),
1683       simde_mm_set_pd(SIMDE_FLOAT64_C(  345.59), SIMDE_FLOAT64_C( -901.28)),
1684       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1685     { simde_mm_set_pd(SIMDE_FLOAT64_C(  733.11), SIMDE_FLOAT64_C(  268.99)),
1686       simde_mm_set_pd(SIMDE_FLOAT64_C(  733.11), SIMDE_FLOAT64_C(  632.42)),
1687       simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) },
1688     { simde_mm_set_pd(SIMDE_FLOAT64_C(  988.26), SIMDE_FLOAT64_C(    0.67)),
1689       simde_mm_set_pd(SIMDE_FLOAT64_C( -735.83), SIMDE_FLOAT64_C(  857.46)),
1690       simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) },
1691     { simde_mm_set_pd(SIMDE_FLOAT64_C(  631.80), SIMDE_FLOAT64_C(  -84.12)),
1692       simde_mm_set_pd(SIMDE_FLOAT64_C(  596.99), SIMDE_FLOAT64_C(  -84.12)),
1693       simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1694     { simde_mm_set_pd(SIMDE_FLOAT64_C(  633.74), SIMDE_FLOAT64_C(  134.88)),
1695       simde_mm_set_pd(SIMDE_FLOAT64_C( -981.15), SIMDE_FLOAT64_C( -897.95)),
1696       simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1697     { simde_mm_set_pd(SIMDE_FLOAT64_C(  995.74), SIMDE_FLOAT64_C( -864.54)),
1698       simde_mm_set_pd(SIMDE_FLOAT64_C( -773.77), SIMDE_FLOAT64_C( -294.67)),
1699       simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) },
1700     { simde_mm_set_pd(SIMDE_FLOAT64_C( -847.57), SIMDE_FLOAT64_C(  363.82)),
1701       simde_mm_set_pd(SIMDE_FLOAT64_C(  743.31), SIMDE_FLOAT64_C( -671.22)),
1702       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) }
1703   };
1704 
1705   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1706     simde__m128i r = simde_mm_castpd_si128(simde_mm_cmplt_pd(test_vec[i].a, test_vec[i].b));
1707     simde_assert_m128i_i64(r, ==, test_vec[i].r);
1708   }
1709 
1710   return 0;
1711 }
1712 
1713 static int
test_simde_mm_cmplt_sd(SIMDE_MUNIT_TEST_ARGS)1714 test_simde_mm_cmplt_sd(SIMDE_MUNIT_TEST_ARGS) {
1715   const struct {
1716     simde__m128d a;
1717     simde__m128d b;
1718     simde__m128d r;
1719   } test_vec[8] = {
1720     { simde_mm_set_pd(SIMDE_FLOAT64_C(  884.89), SIMDE_FLOAT64_C( -700.86)),
1721       simde_mm_set_pd(SIMDE_FLOAT64_C(  194.09), SIMDE_FLOAT64_C(  342.08)),
1722       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  884.89), SIMDE_FLOAT64_C( -700.86)), simde_x_mm_setone_pd()) },
1723     { simde_mm_set_pd(SIMDE_FLOAT64_C( -552.29), SIMDE_FLOAT64_C( -477.43)),
1724       simde_mm_set_pd(SIMDE_FLOAT64_C( -288.53), SIMDE_FLOAT64_C( -439.96)),
1725       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -552.29), SIMDE_FLOAT64_C( -477.43)), simde_x_mm_setone_pd()) },
1726     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -25.82), SIMDE_FLOAT64_C( -940.19)),
1727       simde_mm_set_pd(SIMDE_FLOAT64_C(  251.57), SIMDE_FLOAT64_C(  618.81)),
1728       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  -25.82), SIMDE_FLOAT64_C( -940.19)), simde_x_mm_setone_pd()) },
1729     { simde_mm_set_pd(SIMDE_FLOAT64_C( -905.83), SIMDE_FLOAT64_C(  120.16)),
1730       simde_mm_set_pd(SIMDE_FLOAT64_C( -235.64), SIMDE_FLOAT64_C( -293.77)),
1731       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -905.83), SIMDE_FLOAT64_C(  120.16)), simde_mm_setzero_pd()) },
1732     { simde_mm_set_pd(SIMDE_FLOAT64_C(  804.90), SIMDE_FLOAT64_C(  266.33)),
1733       simde_mm_set_pd(SIMDE_FLOAT64_C( -104.58), SIMDE_FLOAT64_C( -965.81)),
1734       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  804.90), SIMDE_FLOAT64_C(  266.33)), simde_mm_setzero_pd()) },
1735     { simde_mm_set_pd(SIMDE_FLOAT64_C(   44.09), SIMDE_FLOAT64_C( -365.90)),
1736       simde_mm_set_pd(SIMDE_FLOAT64_C(  534.45), SIMDE_FLOAT64_C( -718.87)),
1737       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(   44.09), SIMDE_FLOAT64_C( -365.90)), simde_mm_setzero_pd()) },
1738     { simde_mm_set_pd(SIMDE_FLOAT64_C( -108.03), SIMDE_FLOAT64_C(  233.20)),
1739       simde_mm_set_pd(SIMDE_FLOAT64_C( -420.51), SIMDE_FLOAT64_C( -879.83)),
1740       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -108.03), SIMDE_FLOAT64_C(  233.20)), simde_mm_setzero_pd()) },
1741     { simde_mm_set_pd(SIMDE_FLOAT64_C( -399.23), SIMDE_FLOAT64_C(  758.04)),
1742       simde_mm_set_pd(SIMDE_FLOAT64_C( -334.35), SIMDE_FLOAT64_C( -250.33)),
1743       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -399.23), SIMDE_FLOAT64_C(  758.04)), simde_mm_setzero_pd()) }
1744   };
1745 
1746   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1747     simde__m128d r = simde_mm_cmplt_sd(test_vec[i].a, test_vec[i].b);
1748     simde_assert_m128d_equal(r, test_vec[i].r);
1749   }
1750 
1751   return 0;
1752 }
1753 
1754 static int
test_simde_mm_cmpnlt_pd(SIMDE_MUNIT_TEST_ARGS)1755 test_simde_mm_cmpnlt_pd(SIMDE_MUNIT_TEST_ARGS) {
1756   const struct {
1757     simde__m128d a;
1758     simde__m128d b;
1759     simde__m128i r;
1760   } test_vec[8] = {
1761     { simde_mm_set_pd(SIMDE_FLOAT64_C(  852.01), SIMDE_FLOAT64_C( -875.21)),
1762       simde_mm_set_pd(SIMDE_FLOAT64_C(  852.01), SIMDE_FLOAT64_C( -124.49)),
1763       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1764     { simde_mm_set_pd(SIMDE_FLOAT64_C(  396.43), SIMDE_FLOAT64_C( -754.13)),
1765       simde_mm_set_pd(SIMDE_FLOAT64_C(  396.43), SIMDE_FLOAT64_C( -446.22)),
1766       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1767     { simde_mm_set_pd(SIMDE_FLOAT64_C(  253.13), SIMDE_FLOAT64_C(  198.68)),
1768       simde_mm_set_pd(SIMDE_FLOAT64_C(  253.13), SIMDE_FLOAT64_C(  828.60)),
1769       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1770     { simde_mm_set_pd(SIMDE_FLOAT64_C(  137.84), SIMDE_FLOAT64_C( -995.54)),
1771       simde_mm_set_pd(SIMDE_FLOAT64_C(  137.84), SIMDE_FLOAT64_C( -366.89)),
1772       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1773     { simde_mm_set_pd(SIMDE_FLOAT64_C(  981.94), SIMDE_FLOAT64_C( -371.83)),
1774       simde_mm_set_pd(SIMDE_FLOAT64_C( -999.24), SIMDE_FLOAT64_C(  567.77)),
1775       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1776     { simde_mm_set_pd(SIMDE_FLOAT64_C(  471.23), SIMDE_FLOAT64_C( -984.85)),
1777       simde_mm_set_pd(SIMDE_FLOAT64_C( -365.65), SIMDE_FLOAT64_C(  102.67)),
1778       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1779     { simde_mm_set_pd(SIMDE_FLOAT64_C( -625.86), SIMDE_FLOAT64_C(  -91.22)),
1780       simde_mm_set_pd(SIMDE_FLOAT64_C( -928.96), SIMDE_FLOAT64_C( -311.29)),
1781       simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) },
1782     { simde_mm_set_pd(SIMDE_FLOAT64_C( -444.22), SIMDE_FLOAT64_C(  458.27)),
1783       simde_mm_set_pd(SIMDE_FLOAT64_C(  882.56), SIMDE_FLOAT64_C(  290.13)),
1784       simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) }
1785   };
1786 
1787   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1788     simde__m128i r = simde_mm_castpd_si128(simde_mm_cmpnlt_pd(test_vec[i].a, test_vec[i].b));
1789     simde_assert_m128i_i64(r, ==, test_vec[i].r);
1790   }
1791 
1792   return 0;
1793 }
1794 
1795 static int
test_simde_mm_cmpnlt_sd(SIMDE_MUNIT_TEST_ARGS)1796 test_simde_mm_cmpnlt_sd(SIMDE_MUNIT_TEST_ARGS) {
1797   const struct {
1798     simde__m128d a;
1799     simde__m128d b;
1800     simde__m128d r;
1801   } test_vec[8] = {
1802     { simde_mm_set_pd(SIMDE_FLOAT64_C( -713.31), SIMDE_FLOAT64_C( -162.56)),
1803       simde_mm_set_pd(SIMDE_FLOAT64_C( -134.78), SIMDE_FLOAT64_C( -333.93)),
1804       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -713.31), SIMDE_FLOAT64_C( -162.56)), simde_x_mm_setone_pd()) },
1805     { simde_mm_set_pd(SIMDE_FLOAT64_C(  903.93), SIMDE_FLOAT64_C(  249.58)),
1806       simde_mm_set_pd(SIMDE_FLOAT64_C(  300.72), SIMDE_FLOAT64_C( -642.46)),
1807       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  903.93), SIMDE_FLOAT64_C(  249.58)), simde_x_mm_setone_pd()) },
1808     { simde_mm_set_pd(SIMDE_FLOAT64_C( -697.34), SIMDE_FLOAT64_C(   79.67)),
1809       simde_mm_set_pd(SIMDE_FLOAT64_C( -123.52), SIMDE_FLOAT64_C( -418.48)),
1810       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -697.34), SIMDE_FLOAT64_C(   79.67)), simde_x_mm_setone_pd()) },
1811     { simde_mm_set_pd(SIMDE_FLOAT64_C(   90.71), SIMDE_FLOAT64_C( -449.42)),
1812       simde_mm_set_pd(SIMDE_FLOAT64_C(  629.69), SIMDE_FLOAT64_C(  449.98)),
1813       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(   90.71), SIMDE_FLOAT64_C( -449.42)), simde_mm_setzero_pd()) },
1814     { simde_mm_set_pd(SIMDE_FLOAT64_C(  751.69), SIMDE_FLOAT64_C( -170.45)),
1815       simde_mm_set_pd(SIMDE_FLOAT64_C( -991.25), SIMDE_FLOAT64_C(  129.62)),
1816       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  751.69), SIMDE_FLOAT64_C( -170.45)), simde_mm_setzero_pd()) },
1817     { simde_mm_set_pd(SIMDE_FLOAT64_C(  961.53), SIMDE_FLOAT64_C( -601.03)),
1818       simde_mm_set_pd(SIMDE_FLOAT64_C( -458.00), SIMDE_FLOAT64_C( -521.61)),
1819       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  961.53), SIMDE_FLOAT64_C( -601.03)), simde_mm_setzero_pd()) },
1820     { simde_mm_set_pd(SIMDE_FLOAT64_C( -697.78), SIMDE_FLOAT64_C(  908.22)),
1821       simde_mm_set_pd(SIMDE_FLOAT64_C( -418.87), SIMDE_FLOAT64_C(  253.38)),
1822       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -697.78), SIMDE_FLOAT64_C(  908.22)), simde_x_mm_setone_pd()) },
1823     { simde_mm_set_pd(SIMDE_FLOAT64_C(  897.84), SIMDE_FLOAT64_C(   98.86)),
1824       simde_mm_set_pd(SIMDE_FLOAT64_C(  743.55), SIMDE_FLOAT64_C( -417.08)),
1825       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  897.84), SIMDE_FLOAT64_C(   98.86)), simde_x_mm_setone_pd()) }
1826   };
1827 
1828   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1829     simde__m128d r = simde_mm_cmpnlt_sd(test_vec[i].a, test_vec[i].b);
1830     simde_assert_m128d_equal(r, test_vec[i].r);
1831   }
1832 
1833   return 0;
1834 }
1835 
1836 static int
test_simde_mm_cmple_pd(SIMDE_MUNIT_TEST_ARGS)1837 test_simde_mm_cmple_pd(SIMDE_MUNIT_TEST_ARGS) {
1838   const struct {
1839     simde__m128d a;
1840     simde__m128d b;
1841     simde__m128i r;
1842   } test_vec[8] = {
1843     { simde_mm_set_pd(SIMDE_FLOAT64_C(  543.54), SIMDE_FLOAT64_C( -463.43)),
1844       simde_mm_set_pd(SIMDE_FLOAT64_C(  803.80), SIMDE_FLOAT64_C( -383.88)),
1845       simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) },
1846     { simde_mm_set_pd(SIMDE_FLOAT64_C( -420.41), SIMDE_FLOAT64_C(  497.43)),
1847       simde_mm_set_pd(SIMDE_FLOAT64_C( -592.95), SIMDE_FLOAT64_C( -224.51)),
1848       simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1849     { simde_mm_set_pd(SIMDE_FLOAT64_C( -921.01), SIMDE_FLOAT64_C( -601.69)),
1850       simde_mm_set_pd(SIMDE_FLOAT64_C( -921.01), SIMDE_FLOAT64_C( -730.20)),
1851       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1852     { simde_mm_set_pd(SIMDE_FLOAT64_C( -456.56), SIMDE_FLOAT64_C(  380.21)),
1853       simde_mm_set_pd(SIMDE_FLOAT64_C( -456.56), SIMDE_FLOAT64_C(  380.21)),
1854       simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) },
1855     { simde_mm_set_pd(SIMDE_FLOAT64_C(  317.08), SIMDE_FLOAT64_C(  136.54)),
1856       simde_mm_set_pd(SIMDE_FLOAT64_C(  944.53), SIMDE_FLOAT64_C(  370.42)),
1857       simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) },
1858     { simde_mm_set_pd(SIMDE_FLOAT64_C( -193.09), SIMDE_FLOAT64_C(  515.21)),
1859       simde_mm_set_pd(SIMDE_FLOAT64_C(  -63.27), SIMDE_FLOAT64_C(  515.21)),
1860       simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) },
1861     { simde_mm_set_pd(SIMDE_FLOAT64_C(   14.93), SIMDE_FLOAT64_C(  166.91)),
1862       simde_mm_set_pd(SIMDE_FLOAT64_C(   14.93), SIMDE_FLOAT64_C( -633.50)),
1863       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1864     { simde_mm_set_pd(SIMDE_FLOAT64_C(  121.98), SIMDE_FLOAT64_C( -542.50)),
1865       simde_mm_set_pd(SIMDE_FLOAT64_C(  121.98), SIMDE_FLOAT64_C( -244.93)),
1866       simde_mm_set_epi64x(INT64_C(-1), INT64_C(-1)) }
1867   };
1868 
1869   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1870     simde__m128i r = simde_mm_castpd_si128(simde_mm_cmple_pd(test_vec[i].a, test_vec[i].b));
1871     simde_assert_m128i_i64(r, ==, test_vec[i].r);
1872   }
1873 
1874   return 0;
1875 }
1876 
1877 static int
test_simde_mm_cmple_sd(SIMDE_MUNIT_TEST_ARGS)1878 test_simde_mm_cmple_sd(SIMDE_MUNIT_TEST_ARGS) {
1879   const struct {
1880     simde__m128d a;
1881     simde__m128d b;
1882     simde__m128d r;
1883   } test_vec[8] = {
1884     { simde_mm_set_pd(SIMDE_FLOAT64_C(   -6.97), SIMDE_FLOAT64_C( -531.93)),
1885       simde_mm_set_pd(SIMDE_FLOAT64_C(  442.04), SIMDE_FLOAT64_C(  237.56)),
1886       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(   -6.97), SIMDE_FLOAT64_C( -531.93)), simde_x_mm_setone_pd()) },
1887     { simde_mm_set_pd(SIMDE_FLOAT64_C(  953.55), SIMDE_FLOAT64_C( -668.52)),
1888       simde_mm_set_pd(SIMDE_FLOAT64_C(   75.21), SIMDE_FLOAT64_C( -841.44)),
1889       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  953.55), SIMDE_FLOAT64_C( -668.52)), simde_mm_setzero_pd()) },
1890     { simde_mm_set_pd(SIMDE_FLOAT64_C( -962.44), SIMDE_FLOAT64_C(  733.31)),
1891       simde_mm_set_pd(SIMDE_FLOAT64_C(  366.34), SIMDE_FLOAT64_C(  744.84)),
1892       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -962.44), SIMDE_FLOAT64_C(  733.31)), simde_x_mm_setone_pd()) },
1893     { simde_mm_set_pd(SIMDE_FLOAT64_C(  546.64), SIMDE_FLOAT64_C(  333.17)),
1894       simde_mm_set_pd(SIMDE_FLOAT64_C(  540.77), SIMDE_FLOAT64_C(   -0.80)),
1895       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  546.64), SIMDE_FLOAT64_C(  333.17)), simde_mm_setzero_pd()) },
1896     { simde_mm_set_pd(SIMDE_FLOAT64_C( -407.18), SIMDE_FLOAT64_C( -763.20)),
1897       simde_mm_set_pd(SIMDE_FLOAT64_C(  973.34), SIMDE_FLOAT64_C( -496.03)),
1898       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -407.18), SIMDE_FLOAT64_C( -763.20)), simde_x_mm_setone_pd()) },
1899     { simde_mm_set_pd(SIMDE_FLOAT64_C(  407.82), SIMDE_FLOAT64_C(  479.81)),
1900       simde_mm_set_pd(SIMDE_FLOAT64_C(  198.41), SIMDE_FLOAT64_C(  710.05)),
1901       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  407.82), SIMDE_FLOAT64_C(  479.81)), simde_x_mm_setone_pd()) },
1902     { simde_mm_set_pd(SIMDE_FLOAT64_C( -282.72), SIMDE_FLOAT64_C( -348.78)),
1903       simde_mm_set_pd(SIMDE_FLOAT64_C(  165.84), SIMDE_FLOAT64_C( -951.18)),
1904       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -282.72), SIMDE_FLOAT64_C( -348.78)), simde_mm_setzero_pd()) },
1905     { simde_mm_set_pd(SIMDE_FLOAT64_C(  642.13), SIMDE_FLOAT64_C( -574.77)),
1906       simde_mm_set_pd(SIMDE_FLOAT64_C( -633.14), SIMDE_FLOAT64_C(  741.95)),
1907       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  642.13), SIMDE_FLOAT64_C( -574.77)), simde_x_mm_setone_pd()) }
1908   };
1909 
1910   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1911     simde__m128d r = simde_mm_cmple_sd(test_vec[i].a, test_vec[i].b);
1912     simde_assert_m128d_equal(r, test_vec[i].r);
1913   }
1914 
1915   return 0;
1916 }
1917 
1918 static int
test_simde_mm_cmpnle_pd(SIMDE_MUNIT_TEST_ARGS)1919 test_simde_mm_cmpnle_pd(SIMDE_MUNIT_TEST_ARGS) {
1920   const struct {
1921     simde__m128d a;
1922     simde__m128d b;
1923     simde__m128i r;
1924   } test_vec[8] = {
1925     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -50.93), SIMDE_FLOAT64_C( -877.25)),
1926       simde_mm_set_pd(SIMDE_FLOAT64_C(  -50.93), SIMDE_FLOAT64_C(   61.42)),
1927       simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1928     { simde_mm_set_pd(SIMDE_FLOAT64_C(  567.19), SIMDE_FLOAT64_C(  768.82)),
1929       simde_mm_set_pd(SIMDE_FLOAT64_C( -689.51), SIMDE_FLOAT64_C(  768.82)),
1930       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1931     { simde_mm_set_pd(SIMDE_FLOAT64_C(  625.00), SIMDE_FLOAT64_C(  979.36)),
1932       simde_mm_set_pd(SIMDE_FLOAT64_C(   59.83), SIMDE_FLOAT64_C(  979.36)),
1933       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1934     { simde_mm_set_pd(SIMDE_FLOAT64_C( -994.58), SIMDE_FLOAT64_C(  130.45)),
1935       simde_mm_set_pd(SIMDE_FLOAT64_C( -720.49), SIMDE_FLOAT64_C(  130.45)),
1936       simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1937     { simde_mm_set_pd(SIMDE_FLOAT64_C(  889.79), SIMDE_FLOAT64_C( -677.25)),
1938       simde_mm_set_pd(SIMDE_FLOAT64_C(  889.79), SIMDE_FLOAT64_C( -677.25)),
1939       simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) },
1940     { simde_mm_set_pd(SIMDE_FLOAT64_C(  802.11), SIMDE_FLOAT64_C( -926.46)),
1941       simde_mm_set_pd(SIMDE_FLOAT64_C( -136.48), SIMDE_FLOAT64_C( -926.46)),
1942       simde_mm_set_epi64x(INT64_C(-1), INT64_C( 0)) },
1943     { simde_mm_set_pd(SIMDE_FLOAT64_C( -579.79), SIMDE_FLOAT64_C(  368.31)),
1944       simde_mm_set_pd(SIMDE_FLOAT64_C( -579.79), SIMDE_FLOAT64_C( -736.86)),
1945       simde_mm_set_epi64x(INT64_C( 0), INT64_C(-1)) },
1946     { simde_mm_set_pd(SIMDE_FLOAT64_C(  408.86), SIMDE_FLOAT64_C(   63.85)),
1947       simde_mm_set_pd(SIMDE_FLOAT64_C(  408.86), SIMDE_FLOAT64_C(  878.02)),
1948       simde_mm_set_epi64x(INT64_C( 0), INT64_C( 0)) }
1949   };
1950 
1951   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1952     simde__m128i r = simde_mm_castpd_si128(simde_mm_cmpnle_pd(test_vec[i].a, test_vec[i].b));
1953     simde_assert_m128i_i64(r, ==, test_vec[i].r);
1954   }
1955 
1956   return 0;
1957 }
1958 
1959 static int
test_simde_mm_cmpnle_sd(SIMDE_MUNIT_TEST_ARGS)1960 test_simde_mm_cmpnle_sd(SIMDE_MUNIT_TEST_ARGS) {
1961   const struct {
1962     simde__m128d a;
1963     simde__m128d b;
1964     simde__m128d r;
1965   } test_vec[8] = {
1966     { simde_mm_set_pd(SIMDE_FLOAT64_C(  863.30), SIMDE_FLOAT64_C(  817.71)),
1967       simde_mm_set_pd(SIMDE_FLOAT64_C(  465.11), SIMDE_FLOAT64_C(  402.99)),
1968       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  863.30), SIMDE_FLOAT64_C(  817.71)), simde_x_mm_setone_pd()) },
1969     { simde_mm_set_pd(SIMDE_FLOAT64_C(  189.40), SIMDE_FLOAT64_C( -607.91)),
1970       simde_mm_set_pd(SIMDE_FLOAT64_C( -476.72), SIMDE_FLOAT64_C( -670.93)),
1971       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  189.40), SIMDE_FLOAT64_C( -607.91)), simde_x_mm_setone_pd()) },
1972     { simde_mm_set_pd(SIMDE_FLOAT64_C( -333.27), SIMDE_FLOAT64_C(  662.88)),
1973       simde_mm_set_pd(SIMDE_FLOAT64_C(  741.44), SIMDE_FLOAT64_C( -212.71)),
1974       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -333.27), SIMDE_FLOAT64_C(  662.88)), simde_x_mm_setone_pd()) },
1975     { simde_mm_set_pd(SIMDE_FLOAT64_C(  426.15), SIMDE_FLOAT64_C( -964.01)),
1976       simde_mm_set_pd(SIMDE_FLOAT64_C(   54.04), SIMDE_FLOAT64_C(  321.51)),
1977       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  426.15), SIMDE_FLOAT64_C( -964.01)), simde_mm_setzero_pd()) },
1978     { simde_mm_set_pd(SIMDE_FLOAT64_C( -797.48), SIMDE_FLOAT64_C(  851.48)),
1979       simde_mm_set_pd(SIMDE_FLOAT64_C(  907.15), SIMDE_FLOAT64_C(  638.76)),
1980       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -797.48), SIMDE_FLOAT64_C(  851.48)), simde_x_mm_setone_pd()) },
1981     { simde_mm_set_pd(SIMDE_FLOAT64_C(  439.23), SIMDE_FLOAT64_C(  238.01)),
1982       simde_mm_set_pd(SIMDE_FLOAT64_C(  -23.09), SIMDE_FLOAT64_C(  160.20)),
1983       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  439.23), SIMDE_FLOAT64_C(  238.01)), simde_x_mm_setone_pd()) },
1984     { simde_mm_set_pd(SIMDE_FLOAT64_C(  537.28), SIMDE_FLOAT64_C(  982.90)),
1985       simde_mm_set_pd(SIMDE_FLOAT64_C(  303.40), SIMDE_FLOAT64_C(  928.78)),
1986       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  537.28), SIMDE_FLOAT64_C(  982.90)), simde_x_mm_setone_pd()) },
1987     { simde_mm_set_pd(SIMDE_FLOAT64_C( -169.84), SIMDE_FLOAT64_C( -696.10)),
1988       simde_mm_set_pd(SIMDE_FLOAT64_C( -302.24), SIMDE_FLOAT64_C( -382.83)),
1989       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -169.84), SIMDE_FLOAT64_C( -696.10)), simde_mm_setzero_pd()) }
1990   };
1991 
1992   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
1993     simde__m128d r = simde_mm_cmpnle_sd(test_vec[i].a, test_vec[i].b);
1994     simde_assert_m128d_equal(r, test_vec[i].r);
1995   }
1996 
1997   return 0;
1998 }
1999 
2000 static int
test_simde_mm_cmpgt_epi8(SIMDE_MUNIT_TEST_ARGS)2001 test_simde_mm_cmpgt_epi8(SIMDE_MUNIT_TEST_ARGS) {
2002   const struct {
2003     simde__m128i a;
2004     simde__m128i b;
2005     simde__m128i r;
2006   } test_vec[8] = {
2007     { simde_mm_set_epi8(INT8_C(  13), INT8_C( -99), INT8_C(-128), INT8_C(  91), INT8_C( -96), INT8_C( 103), INT8_C(-104), INT8_C(-110),
2008                         INT8_C( -46), INT8_C(  -5), INT8_C(  62), INT8_C(-125), INT8_C( -51), INT8_C( -65), INT8_C(-102), INT8_C( -14)),
2009       simde_mm_set_epi8(INT8_C(  10), INT8_C( -84), INT8_C(  90), INT8_C(-110), INT8_C( 113), INT8_C( -34), INT8_C( -75), INT8_C(-110),
2010                         INT8_C( -79), INT8_C(-114), INT8_C(  26), INT8_C(-127), INT8_C(  -5), INT8_C(  -9), INT8_C(-102), INT8_C( -38)),
2011       simde_mm_set_epi8(INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(   0),
2012                         INT8_C(  -1), INT8_C(  -1), INT8_C(  -1), INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1)) },
2013     { simde_mm_set_epi8(INT8_C(-110), INT8_C( -93), INT8_C( 106), INT8_C( -55), INT8_C(  91), INT8_C( -78), INT8_C(  69), INT8_C(  62),
2014                         INT8_C(  38), INT8_C(-101), INT8_C(  86), INT8_C(-107), INT8_C( 114), INT8_C( 120), INT8_C(-118), INT8_C( 101)),
2015       simde_mm_set_epi8(INT8_C(  58), INT8_C( -88), INT8_C(  75), INT8_C( -55), INT8_C(  92), INT8_C(  51), INT8_C(-109), INT8_C(  62),
2016                         INT8_C( 123), INT8_C( -42), INT8_C(   0), INT8_C(  40), INT8_C( 114), INT8_C(-115), INT8_C(  34), INT8_C( 101)),
2017       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0),
2018                         INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(   0)) },
2019     { simde_mm_set_epi8(INT8_C(  79), INT8_C(  -2), INT8_C(-126), INT8_C(-121), INT8_C(  71), INT8_C( -59), INT8_C(  95), INT8_C(  38),
2020                         INT8_C( -95), INT8_C( 103), INT8_C( -55), INT8_C( -42), INT8_C(-124), INT8_C( -82), INT8_C( 102), INT8_C(  97)),
2021       simde_mm_set_epi8(INT8_C( -39), INT8_C( -59), INT8_C(-126), INT8_C(-107), INT8_C(-111), INT8_C( 122), INT8_C( -55), INT8_C(  87),
2022                         INT8_C( -95), INT8_C( -99), INT8_C(  56), INT8_C( 120), INT8_C( 107), INT8_C( -79), INT8_C(  -9), INT8_C( -36)),
2023       simde_mm_set_epi8(INT8_C(  -1), INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(  -1), INT8_C(   0),
2024                         INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(  -1)) },
2025     { simde_mm_set_epi8(INT8_C( -68), INT8_C(   7), INT8_C( -32), INT8_C( 120), INT8_C(-106), INT8_C(-127), INT8_C(  37), INT8_C(  95),
2026                         INT8_C( -77), INT8_C(-126), INT8_C(-111), INT8_C( -96), INT8_C(  67), INT8_C(  43), INT8_C(-123), INT8_C(  21)),
2027       simde_mm_set_epi8(INT8_C(  72), INT8_C(  68), INT8_C(  76), INT8_C( -22), INT8_C( -11), INT8_C(  34), INT8_C( 112), INT8_C(  95),
2028                         INT8_C( -77), INT8_C(  36), INT8_C( 119), INT8_C( -59), INT8_C( -49), INT8_C( -22), INT8_C(-125), INT8_C(  21)),
2029       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
2030                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(  -1), INT8_C(  -1), INT8_C(   0)) },
2031     { simde_mm_set_epi8(INT8_C( -18), INT8_C(  13), INT8_C(  66), INT8_C( -52), INT8_C( -92), INT8_C(  28), INT8_C(-122), INT8_C( -12),
2032                         INT8_C( -60), INT8_C( 125), INT8_C(-104), INT8_C(-118), INT8_C( -76), INT8_C(  42), INT8_C( -48), INT8_C(-120)),
2033       simde_mm_set_epi8(INT8_C( -17), INT8_C(  13), INT8_C(  66), INT8_C( -64), INT8_C( -92), INT8_C( 114), INT8_C(-119), INT8_C(-106),
2034                         INT8_C(  78), INT8_C(-125), INT8_C(  88), INT8_C( -88), INT8_C( 101), INT8_C(  42), INT8_C( -58), INT8_C(  -8)),
2035       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1),
2036                         INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0)) },
2037     { simde_mm_set_epi8(INT8_C(-112), INT8_C( -16), INT8_C(  96), INT8_C( -64), INT8_C(  75), INT8_C(  56), INT8_C( -96), INT8_C(  96),
2038                         INT8_C(  -8), INT8_C(  16), INT8_C(  95), INT8_C(  41), INT8_C(  62), INT8_C(  -2), INT8_C(-105), INT8_C(-101)),
2039       simde_mm_set_epi8(INT8_C(  42), INT8_C(   7), INT8_C(  90), INT8_C( -93), INT8_C(  75), INT8_C(  14), INT8_C(  -5), INT8_C(  61),
2040                         INT8_C(  -8), INT8_C( -49), INT8_C(  95), INT8_C(  82), INT8_C( -93), INT8_C( -80), INT8_C(   6), INT8_C( -48)),
2041       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(  -1), INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(  -1),
2042                         INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(  -1), INT8_C(   0), INT8_C(   0)) },
2043     { simde_mm_set_epi8(INT8_C( -56), INT8_C( -85), INT8_C(   9), INT8_C(   3), INT8_C(  32), INT8_C(-105), INT8_C(  93), INT8_C( -78),
2044                         INT8_C(-113), INT8_C(  96), INT8_C(  61), INT8_C(  14), INT8_C( -92), INT8_C(  53), INT8_C(  51), INT8_C(  -7)),
2045       simde_mm_set_epi8(INT8_C(  15), INT8_C( 100), INT8_C(   9), INT8_C(  70), INT8_C(-115), INT8_C(-105), INT8_C(  14), INT8_C( -41),
2046                         INT8_C(-113), INT8_C( -54), INT8_C( -38), INT8_C(  14), INT8_C( -53), INT8_C(   5), INT8_C(-127), INT8_C(  -7)),
2047       simde_mm_set_epi8(INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(  -1), INT8_C(   0),
2048                         INT8_C(   0), INT8_C(  -1), INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(  -1), INT8_C(   0)) },
2049     { simde_mm_set_epi8(INT8_C( 120), INT8_C(  38), INT8_C(  44), INT8_C( 103), INT8_C(  33), INT8_C( -93), INT8_C(-102), INT8_C( -46),
2050                         INT8_C(  47), INT8_C(   7), INT8_C( 120), INT8_C( 102), INT8_C( -87), INT8_C( -84), INT8_C(  92), INT8_C(  87)),
2051       simde_mm_set_epi8(INT8_C( -11), INT8_C(  89), INT8_C(  26), INT8_C(  69), INT8_C( 108), INT8_C( 127), INT8_C(-102), INT8_C(  49),
2052                         INT8_C(  53), INT8_C(  57), INT8_C( 120), INT8_C( -23), INT8_C( -87), INT8_C( -84), INT8_C( 113), INT8_C( -36)),
2053       simde_mm_set_epi8(INT8_C(  -1), INT8_C(   0), INT8_C(  -1), INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(   0),
2054                         INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1), INT8_C(   0), INT8_C(   0), INT8_C(   0), INT8_C(  -1)) }
2055   };
2056 
2057   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
2058     simde__m128i r = simde_mm_cmpgt_epi8(test_vec[i].a, test_vec[i].b);
2059     simde_assert_m128i_i8(r, ==, test_vec[i].r);
2060   }
2061 
2062   return 0;
2063 }
2064 
2065 static int
test_simde_mm_cmpgt_epi16(SIMDE_MUNIT_TEST_ARGS)2066 test_simde_mm_cmpgt_epi16(SIMDE_MUNIT_TEST_ARGS) {
2067   const struct {
2068     simde__m128i a;
2069     simde__m128i b;
2070     simde__m128i r;
2071   } test_vec[8] = {
2072     { simde_mm_set_epi16(INT16_C( 11481), INT16_C(-31028), INT16_C(-28938), INT16_C(  3434),
2073                          INT16_C(  2523), INT16_C(-16298), INT16_C(-20752), INT16_C( -3418)),
2074       simde_mm_set_epi16(INT16_C( 11481), INT16_C(-30562), INT16_C(  4762), INT16_C( -6519),
2075                          INT16_C(  2523), INT16_C(  9845), INT16_C(   -18), INT16_C( -5787)),
2076       simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(    -1),
2077                          INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(    -1)) },
2078     { simde_mm_set_epi16(INT16_C( -3487), INT16_C( -2281), INT16_C(  2722), INT16_C(-23699),
2079                          INT16_C( -5087), INT16_C( 24907), INT16_C( 26126), INT16_C( 26357)),
2080       simde_mm_set_epi16(INT16_C( 32178), INT16_C(-24562), INT16_C( -3261), INT16_C(-23699),
2081                          INT16_C(  2431), INT16_C(-16600), INT16_C( -5679), INT16_C(-12625)),
2082       simde_mm_set_epi16(INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(     0),
2083                          INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(    -1)) },
2084     { simde_mm_set_epi16(INT16_C( 28908), INT16_C( -8639), INT16_C(-27999), INT16_C(-19726),
2085                          INT16_C( 28446), INT16_C(  -947), INT16_C( -9756), INT16_C(-32088)),
2086       simde_mm_set_epi16(INT16_C(-24056), INT16_C(-13026), INT16_C(-27999), INT16_C( 27584),
2087                          INT16_C(-22292), INT16_C( 18403), INT16_C(-15329), INT16_C( 30515)),
2088       simde_mm_set_epi16(INT16_C(    -1), INT16_C(    -1), INT16_C(     0), INT16_C(     0),
2089                          INT16_C(    -1), INT16_C(     0), INT16_C(    -1), INT16_C(     0)) },
2090     { simde_mm_set_epi16(INT16_C( 32500), INT16_C( 28770), INT16_C(-12789), INT16_C(   764),
2091                          INT16_C(-17186), INT16_C(  5823), INT16_C(  5923), INT16_C(-14898)),
2092       simde_mm_set_epi16(INT16_C(  5264), INT16_C(-27897), INT16_C(-22472), INT16_C(-17764),
2093                          INT16_C( 20191), INT16_C( 20077), INT16_C(-20539), INT16_C( -7345)),
2094       simde_mm_set_epi16(INT16_C(    -1), INT16_C(    -1), INT16_C(    -1), INT16_C(    -1),
2095                          INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(     0)) },
2096     { simde_mm_set_epi16(INT16_C(-32420), INT16_C(-10018), INT16_C( 10034), INT16_C( 21195),
2097                          INT16_C( 23576), INT16_C( 23578), INT16_C( 27261), INT16_C( 22728)),
2098       simde_mm_set_epi16(INT16_C(-22785), INT16_C(  9581), INT16_C( -7653), INT16_C(-22519),
2099                          INT16_C(  2089), INT16_C( 10927), INT16_C( 31136), INT16_C( 28081)),
2100       simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(    -1),
2101                          INT16_C(    -1), INT16_C(    -1), INT16_C(     0), INT16_C(     0)) },
2102     { simde_mm_set_epi16(INT16_C( -8613), INT16_C( 14814), INT16_C( 25977), INT16_C(-32026),
2103                          INT16_C(-14164), INT16_C( 15788), INT16_C( 26276), INT16_C(-23351)),
2104       simde_mm_set_epi16(INT16_C( 18907), INT16_C( 31050), INT16_C( 25483), INT16_C( -1544),
2105                          INT16_C(-22377), INT16_C(-30002), INT16_C( 26276), INT16_C(-21368)),
2106       simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(     0),
2107                          INT16_C(    -1), INT16_C(    -1), INT16_C(     0), INT16_C(     0)) },
2108     { simde_mm_set_epi16(INT16_C( -8616), INT16_C( 18300), INT16_C(-13448), INT16_C(-25384),
2109                          INT16_C(-20778), INT16_C(  9404), INT16_C( 18457), INT16_C(-13013)),
2110       simde_mm_set_epi16(INT16_C( 28965), INT16_C(-22807), INT16_C( 20081), INT16_C(-25384),
2111                          INT16_C( 21664), INT16_C(-19420), INT16_C(-10494), INT16_C(  8092)),
2112       simde_mm_set_epi16(INT16_C(     0), INT16_C(    -1), INT16_C(     0), INT16_C(     0),
2113                          INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(     0)) },
2114     { simde_mm_set_epi16(INT16_C(-19643), INT16_C( 19578), INT16_C(-31344), INT16_C(-10120),
2115                          INT16_C( -1042), INT16_C( 26214), INT16_C(  7476), INT16_C( 19171)),
2116       simde_mm_set_epi16(INT16_C(  3338), INT16_C(-31811), INT16_C( 23264), INT16_C( 16135),
2117                          INT16_C( 10963), INT16_C( 28585), INT16_C( 10267), INT16_C( 15982)),
2118       simde_mm_set_epi16(INT16_C(     0), INT16_C(    -1), INT16_C(     0), INT16_C(     0),
2119                          INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(    -1)) }
2120   };
2121 
2122   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2123     simde__m128i r = simde_mm_cmpgt_epi16(test_vec[i].a, test_vec[i].b);
2124     simde_assert_m128i_i16(r, ==, test_vec[i].r);
2125   }
2126 
2127   return 0;
2128 }
2129 
2130 static int
test_simde_mm_cmpgt_epi32(SIMDE_MUNIT_TEST_ARGS)2131 test_simde_mm_cmpgt_epi32(SIMDE_MUNIT_TEST_ARGS) {
2132   const struct {
2133     simde__m128i a;
2134     simde__m128i b;
2135     simde__m128i r;
2136   } test_vec[8] = {
2137     { simde_mm_set_epi32(  752453324, -1896477334,   165396566, -1359940954),
2138       simde_mm_set_epi32(  752453324,   312141449, -1431099787,    -1119899),
2139       simde_mm_set_epi32(          0,           0,          -1,           0) },
2140     { simde_mm_set_epi32(  107153560,  1681238316, -2021152487, -1327623679),
2141       simde_mm_set_epi32( -228460777,   178430829,  -333356725,  1712219893),
2142       simde_mm_set_epi32(         -1,          -1,           0,           0) },
2143     { simde_mm_set_epi32( -899341348, -1183976764,    50756911,  -774436817),
2144       simde_mm_set_epi32( -899341348, -1675909702,    50756911,   393145285),
2145       simde_mm_set_epi32(          0,          -1,           0,           0) },
2146     { simde_mm_set_epi32(-1576481506,   693332928, -1460910109, -1004570829),
2147       simde_mm_set_epi32(-1038801032, -1159952439, -1460910109,   -43665635),
2148       simde_mm_set_epi32(          0,          -1,           0,           0) },
2149     { simde_mm_set_epi32( 2129948770,  -838139140, -1126295873,   388220366),
2150       simde_mm_set_epi32(  345019143, -1472677220,  1323257453, -1345985713),
2151       simde_mm_set_epi32(         -1,          -1,           0,          -1) },
2152     { simde_mm_set_epi32(  324758156,  1228690576, -1773311089,   254589418),
2153       simde_mm_set_epi32(-2124621602,  1228690576,  1545100314,  1786599624),
2154       simde_mm_set_epi32(         -1,           0,           0,           0) },
2155     { simde_mm_set_epi32(-1939857174,   351576089,    62939556, -1061610170),
2156       simde_mm_set_epi32(-1899113305,  1851167226,    62939556, -2109881445),
2157       simde_mm_set_epi32(          0,           0,           0,          -1) },
2158     { simde_mm_set_epi32( 1239120202,  1670117880, -1466463538,  1932307592),
2159       simde_mm_set_epi32( 1694384857,    79202881,  -114087446,  -617386644),
2160       simde_mm_set_epi32(          0,          -1,           0,          -1) }
2161   };
2162 
2163   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2164     simde__m128i r = simde_mm_cmpgt_epi32(test_vec[i].a, test_vec[i].b);
2165     simde_assert_m128i_i32(r, ==, test_vec[i].r);
2166   }
2167 
2168   return 0;
2169 }
2170 
2171 static int
test_simde_mm_cmpgt_pd(SIMDE_MUNIT_TEST_ARGS)2172 test_simde_mm_cmpgt_pd(SIMDE_MUNIT_TEST_ARGS) {
2173   const struct {
2174     simde__m128d a;
2175     simde__m128d b;
2176     simde__m128d r;
2177   } test_vec[8] = {
2178     { simde_mm_set_pd(SIMDE_FLOAT64_C( -649.61), SIMDE_FLOAT64_C(  366.73)),
2179       simde_mm_set_pd(SIMDE_FLOAT64_C(  333.59), SIMDE_FLOAT64_C(  116.88)),
2180       simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2181     { simde_mm_set_pd(SIMDE_FLOAT64_C( -619.22), SIMDE_FLOAT64_C( -854.65)),
2182       simde_mm_set_pd(SIMDE_FLOAT64_C( -854.79), SIMDE_FLOAT64_C(  863.33)),
2183       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2184     { simde_mm_set_pd(SIMDE_FLOAT64_C( -950.10), SIMDE_FLOAT64_C(  381.78)),
2185       simde_mm_set_pd(SIMDE_FLOAT64_C(  844.77), SIMDE_FLOAT64_C( -217.11)),
2186       simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2187     { simde_mm_set_pd(SIMDE_FLOAT64_C( -925.79), SIMDE_FLOAT64_C( -916.91)),
2188       simde_mm_set_pd(SIMDE_FLOAT64_C(  -17.99), SIMDE_FLOAT64_C(  826.72)),
2189       simde_mm_setzero_pd() },
2190     { simde_mm_set_pd(SIMDE_FLOAT64_C(  581.21), SIMDE_FLOAT64_C(  639.37)),
2191       simde_mm_set_pd(SIMDE_FLOAT64_C(  581.21), SIMDE_FLOAT64_C(  448.67)),
2192       simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2193     { simde_mm_set_pd(SIMDE_FLOAT64_C(  702.29), SIMDE_FLOAT64_C( -582.84)),
2194       simde_mm_set_pd(SIMDE_FLOAT64_C(  702.29), SIMDE_FLOAT64_C(  186.24)),
2195       simde_mm_setzero_pd() },
2196     { simde_mm_set_pd(SIMDE_FLOAT64_C(  532.21), SIMDE_FLOAT64_C(  145.56)),
2197       simde_mm_set_pd(SIMDE_FLOAT64_C( -677.14), SIMDE_FLOAT64_C(  145.56)),
2198       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2199     { simde_mm_set_pd(SIMDE_FLOAT64_C(  459.86), SIMDE_FLOAT64_C(  265.89)),
2200       simde_mm_set_pd(SIMDE_FLOAT64_C( -130.43), SIMDE_FLOAT64_C(  334.48)),
2201       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }
2202   };
2203 
2204   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2205     simde__m128d r = simde_mm_cmpgt_pd(test_vec[i].a, test_vec[i].b);
2206     simde_assert_m128d_equal(r, test_vec[i].r);
2207   }
2208 
2209   return 0;
2210 }
2211 
2212 static int
test_simde_mm_cmpgt_sd(SIMDE_MUNIT_TEST_ARGS)2213 test_simde_mm_cmpgt_sd(SIMDE_MUNIT_TEST_ARGS) {
2214   const struct {
2215     simde__m128d a;
2216     simde__m128d b;
2217     simde__m128d r;
2218   } test_vec[8] = {
2219     { simde_mm_set_pd(SIMDE_FLOAT64_C(  482.46), SIMDE_FLOAT64_C(   39.32)),
2220       simde_mm_set_pd(SIMDE_FLOAT64_C(  175.75), SIMDE_FLOAT64_C( -451.08)),
2221       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  482.46), SIMDE_FLOAT64_C(   39.32)), simde_x_mm_setone_pd()) },
2222     { simde_mm_set_pd(SIMDE_FLOAT64_C(  850.79), SIMDE_FLOAT64_C(  999.92)),
2223       simde_mm_set_pd(SIMDE_FLOAT64_C( -978.35), SIMDE_FLOAT64_C(  216.37)),
2224       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  850.79), SIMDE_FLOAT64_C(  999.92)), simde_x_mm_setone_pd()) },
2225     { simde_mm_set_pd(SIMDE_FLOAT64_C( -218.27), SIMDE_FLOAT64_C(  952.36)),
2226       simde_mm_set_pd(SIMDE_FLOAT64_C( -402.87), SIMDE_FLOAT64_C( -852.22)),
2227       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -218.27), SIMDE_FLOAT64_C(  952.36)), simde_x_mm_setone_pd()) },
2228     { simde_mm_set_pd(SIMDE_FLOAT64_C( -324.97), SIMDE_FLOAT64_C(  -18.67)),
2229       simde_mm_set_pd(SIMDE_FLOAT64_C( -602.36), SIMDE_FLOAT64_C(  488.60)),
2230       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -324.97), SIMDE_FLOAT64_C(  -18.67)), simde_mm_setzero_pd()) },
2231     { simde_mm_set_pd(SIMDE_FLOAT64_C( -224.88), SIMDE_FLOAT64_C(  278.88)),
2232       simde_mm_set_pd(SIMDE_FLOAT64_C(  861.73), SIMDE_FLOAT64_C( -326.54)),
2233       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -224.88), SIMDE_FLOAT64_C(  278.88)), simde_x_mm_setone_pd()) },
2234     { simde_mm_set_pd(SIMDE_FLOAT64_C( -160.74), SIMDE_FLOAT64_C(  611.30)),
2235       simde_mm_set_pd(SIMDE_FLOAT64_C(  370.13), SIMDE_FLOAT64_C(   18.16)),
2236       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -160.74), SIMDE_FLOAT64_C(  611.30)), simde_x_mm_setone_pd()) },
2237     { simde_mm_set_pd(SIMDE_FLOAT64_C(  914.20), SIMDE_FLOAT64_C(  278.69)),
2238       simde_mm_set_pd(SIMDE_FLOAT64_C(  703.64), SIMDE_FLOAT64_C( -975.84)),
2239       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  914.20), SIMDE_FLOAT64_C(  278.69)), simde_x_mm_setone_pd()) },
2240     { simde_mm_set_pd(SIMDE_FLOAT64_C(  392.89), SIMDE_FLOAT64_C(   45.41)),
2241       simde_mm_set_pd(SIMDE_FLOAT64_C(  713.78), SIMDE_FLOAT64_C(   -6.71)),
2242       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  392.89), SIMDE_FLOAT64_C(   45.41)), simde_x_mm_setone_pd()) }
2243   };
2244 
2245   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2246     simde__m128d r = simde_mm_cmpgt_sd(test_vec[i].a, test_vec[i].b);
2247     simde_assert_m128d_equal(r, test_vec[i].r);
2248   }
2249 
2250   return 0;
2251 }
2252 
2253 static int
test_simde_mm_cmpngt_pd(SIMDE_MUNIT_TEST_ARGS)2254 test_simde_mm_cmpngt_pd(SIMDE_MUNIT_TEST_ARGS) {
2255   const struct {
2256     simde__m128d a;
2257     simde__m128d b;
2258     simde__m128d r;
2259   } test_vec[8] = {
2260     { simde_mm_set_pd(SIMDE_FLOAT64_C(   65.48), SIMDE_FLOAT64_C( -195.60)),
2261       simde_mm_set_pd(SIMDE_FLOAT64_C(   65.48), SIMDE_FLOAT64_C(   18.27)),
2262       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_x_mm_setone_pd()) },
2263     { simde_mm_set_pd(SIMDE_FLOAT64_C( -563.65), SIMDE_FLOAT64_C(  884.03)),
2264       simde_mm_set_pd(SIMDE_FLOAT64_C(  467.71), SIMDE_FLOAT64_C( -906.63)),
2265       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2266     { simde_mm_set_pd(SIMDE_FLOAT64_C( -553.85), SIMDE_FLOAT64_C(   49.09)),
2267       simde_mm_set_pd(SIMDE_FLOAT64_C(  731.88), SIMDE_FLOAT64_C(  974.91)),
2268       simde_x_mm_setone_pd() },
2269     { simde_mm_set_pd(SIMDE_FLOAT64_C(  492.98), SIMDE_FLOAT64_C(   64.21)),
2270       simde_mm_set_pd(SIMDE_FLOAT64_C( -392.36), SIMDE_FLOAT64_C( -188.43)),
2271       simde_mm_setzero_pd() },
2272     { simde_mm_set_pd(SIMDE_FLOAT64_C( -248.75), SIMDE_FLOAT64_C( -404.39)),
2273       simde_mm_set_pd(SIMDE_FLOAT64_C( -495.92), SIMDE_FLOAT64_C( -819.81)),
2274       simde_mm_setzero_pd() },
2275     { simde_mm_set_pd(SIMDE_FLOAT64_C( -932.57), SIMDE_FLOAT64_C(  741.27)),
2276       simde_mm_set_pd(SIMDE_FLOAT64_C( -307.42), SIMDE_FLOAT64_C(  170.69)),
2277       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2278     { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.12), SIMDE_FLOAT64_C( -161.45)),
2279       simde_mm_set_pd(SIMDE_FLOAT64_C( -939.12), SIMDE_FLOAT64_C( -161.45)),
2280       simde_x_mm_setone_pd() },
2281     { simde_mm_set_pd(SIMDE_FLOAT64_C( -900.20), SIMDE_FLOAT64_C( -314.63)),
2282       simde_mm_set_pd(SIMDE_FLOAT64_C(  138.12), SIMDE_FLOAT64_C(  517.19)),
2283       simde_x_mm_setone_pd() }
2284   };
2285 
2286   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2287     simde__m128d r = simde_mm_cmpngt_pd(test_vec[i].a, test_vec[i].b);
2288     simde_assert_m128d_equal(r, test_vec[i].r);
2289   }
2290 
2291   return 0;
2292 }
2293 
2294 static int
test_simde_mm_cmpngt_sd(SIMDE_MUNIT_TEST_ARGS)2295 test_simde_mm_cmpngt_sd(SIMDE_MUNIT_TEST_ARGS) {
2296   const struct {
2297     simde__m128d a;
2298     simde__m128d b;
2299     simde__m128d r;
2300   } test_vec[8] = {
2301     { simde_mm_set_pd(SIMDE_FLOAT64_C( -695.66), SIMDE_FLOAT64_C(  678.35)),
2302       simde_mm_set_pd(SIMDE_FLOAT64_C(  356.43), SIMDE_FLOAT64_C(  495.31)),
2303       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -695.66), SIMDE_FLOAT64_C(  678.35)), simde_mm_setzero_pd()) },
2304     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -72.63), SIMDE_FLOAT64_C(  895.56)),
2305       simde_mm_set_pd(SIMDE_FLOAT64_C( -885.88), SIMDE_FLOAT64_C(  947.04)),
2306       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  -72.63), SIMDE_FLOAT64_C(  895.56)), simde_x_mm_setone_pd()) },
2307     { simde_mm_set_pd(SIMDE_FLOAT64_C(   72.92), SIMDE_FLOAT64_C( -711.12)),
2308       simde_mm_set_pd(SIMDE_FLOAT64_C( -242.49), SIMDE_FLOAT64_C( -686.51)),
2309       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(   72.92), SIMDE_FLOAT64_C( -711.12)), simde_x_mm_setone_pd()) },
2310     { simde_mm_set_pd(SIMDE_FLOAT64_C(  520.17), SIMDE_FLOAT64_C(  176.32)),
2311       simde_mm_set_pd(SIMDE_FLOAT64_C( -442.78), SIMDE_FLOAT64_C( -956.19)),
2312       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  520.17), SIMDE_FLOAT64_C(  176.32)), simde_mm_setzero_pd()) },
2313     { simde_mm_set_pd(SIMDE_FLOAT64_C(  949.11), SIMDE_FLOAT64_C(  112.35)),
2314       simde_mm_set_pd(SIMDE_FLOAT64_C( -212.07), SIMDE_FLOAT64_C(  851.84)),
2315       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  949.11), SIMDE_FLOAT64_C(  112.35)), simde_x_mm_setone_pd()) },
2316     { simde_mm_set_pd(SIMDE_FLOAT64_C( -142.01), SIMDE_FLOAT64_C( -216.70)),
2317       simde_mm_set_pd(SIMDE_FLOAT64_C( -467.63), SIMDE_FLOAT64_C(  481.36)),
2318       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -142.01), SIMDE_FLOAT64_C( -216.70)), simde_x_mm_setone_pd()) },
2319     { simde_mm_set_pd(SIMDE_FLOAT64_C(  918.90), SIMDE_FLOAT64_C(  481.59)),
2320       simde_mm_set_pd(SIMDE_FLOAT64_C( -147.11), SIMDE_FLOAT64_C(  677.03)),
2321       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  918.90), SIMDE_FLOAT64_C(  481.59)), simde_x_mm_setone_pd()) },
2322     { simde_mm_set_pd(SIMDE_FLOAT64_C( -143.49), SIMDE_FLOAT64_C(  447.22)),
2323       simde_mm_set_pd(SIMDE_FLOAT64_C(   50.06), SIMDE_FLOAT64_C(  827.25)),
2324       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -143.49), SIMDE_FLOAT64_C(  447.22)), simde_x_mm_setone_pd()) }
2325   };
2326 
2327   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2328     simde__m128d r = simde_mm_cmpngt_sd(test_vec[i].a, test_vec[i].b);
2329     simde_assert_m128d_equal(r, test_vec[i].r);
2330   }
2331 
2332   return 0;
2333 }
2334 
2335 static int
test_simde_mm_cmpge_pd(SIMDE_MUNIT_TEST_ARGS)2336 test_simde_mm_cmpge_pd(SIMDE_MUNIT_TEST_ARGS) {
2337   const struct {
2338     simde__m128d a;
2339     simde__m128d b;
2340     simde__m128d r;
2341   } test_vec[8] = {
2342     { simde_mm_set_pd(SIMDE_FLOAT64_C( -649.61), SIMDE_FLOAT64_C(  366.73)),
2343       simde_mm_set_pd(SIMDE_FLOAT64_C(  333.59), SIMDE_FLOAT64_C(  116.88)),
2344       simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2345     { simde_mm_set_pd(SIMDE_FLOAT64_C( -619.22), SIMDE_FLOAT64_C( -854.65)),
2346       simde_mm_set_pd(SIMDE_FLOAT64_C( -854.79), SIMDE_FLOAT64_C(  863.33)),
2347       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2348     { simde_mm_set_pd(SIMDE_FLOAT64_C( -950.10), SIMDE_FLOAT64_C(  381.78)),
2349       simde_mm_set_pd(SIMDE_FLOAT64_C(  844.77), SIMDE_FLOAT64_C( -217.11)),
2350       simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2351     { simde_mm_set_pd(SIMDE_FLOAT64_C( -925.79), SIMDE_FLOAT64_C( -916.91)),
2352       simde_mm_set_pd(SIMDE_FLOAT64_C(  -17.99), SIMDE_FLOAT64_C(  826.72)),
2353       simde_mm_setzero_pd() },
2354     { simde_mm_set_pd(SIMDE_FLOAT64_C(  581.21), SIMDE_FLOAT64_C(  639.37)),
2355       simde_mm_set_pd(SIMDE_FLOAT64_C(  581.21), SIMDE_FLOAT64_C(  448.67)),
2356       simde_x_mm_setone_pd() },
2357     { simde_mm_set_pd(SIMDE_FLOAT64_C(  702.29), SIMDE_FLOAT64_C( -582.84)),
2358       simde_mm_set_pd(SIMDE_FLOAT64_C(  702.29), SIMDE_FLOAT64_C(  186.24)),
2359       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2360     { simde_mm_set_pd(SIMDE_FLOAT64_C(  532.21), SIMDE_FLOAT64_C(  145.56)),
2361       simde_mm_set_pd(SIMDE_FLOAT64_C( -677.14), SIMDE_FLOAT64_C(  145.56)),
2362       simde_x_mm_setone_pd() },
2363     { simde_mm_set_pd(SIMDE_FLOAT64_C(  459.86), SIMDE_FLOAT64_C(  265.89)),
2364       simde_mm_set_pd(SIMDE_FLOAT64_C( -130.43), SIMDE_FLOAT64_C(  334.48)),
2365       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) }
2366   };
2367 
2368   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2369     simde__m128d r = simde_mm_cmpge_pd(test_vec[i].a, test_vec[i].b);
2370     simde_assert_m128d_equal(r, test_vec[i].r);
2371   }
2372 
2373   return 0;
2374 }
2375 
2376 static int
test_simde_mm_cmpge_sd(SIMDE_MUNIT_TEST_ARGS)2377 test_simde_mm_cmpge_sd(SIMDE_MUNIT_TEST_ARGS) {
2378   const struct {
2379     simde__m128d a;
2380     simde__m128d b;
2381     simde__m128d r;
2382   } test_vec[8] = {
2383     { simde_mm_set_pd(SIMDE_FLOAT64_C( -315.45), SIMDE_FLOAT64_C(  193.79)),
2384       simde_mm_set_pd(SIMDE_FLOAT64_C( -204.45), SIMDE_FLOAT64_C(  887.13)),
2385       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -315.45), SIMDE_FLOAT64_C(  193.79)), simde_mm_setzero_pd()) },
2386     { simde_mm_set_pd(SIMDE_FLOAT64_C( -670.47), SIMDE_FLOAT64_C(  937.31)),
2387       simde_mm_set_pd(SIMDE_FLOAT64_C(  343.22), SIMDE_FLOAT64_C( -308.01)),
2388       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -670.47), SIMDE_FLOAT64_C(  937.31)), simde_x_mm_setone_pd()) },
2389     { simde_mm_set_pd(SIMDE_FLOAT64_C( -903.63), SIMDE_FLOAT64_C( -850.53)),
2390       simde_mm_set_pd(SIMDE_FLOAT64_C( -838.64), SIMDE_FLOAT64_C( -936.46)),
2391       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -903.63), SIMDE_FLOAT64_C( -850.53)), simde_x_mm_setone_pd()) },
2392     { simde_mm_set_pd(SIMDE_FLOAT64_C(  236.33), SIMDE_FLOAT64_C(  126.98)),
2393       simde_mm_set_pd(SIMDE_FLOAT64_C(  872.82), SIMDE_FLOAT64_C( -512.42)),
2394       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  236.33), SIMDE_FLOAT64_C(  126.98)), simde_x_mm_setone_pd()) },
2395     { simde_mm_set_pd(SIMDE_FLOAT64_C(  811.87), SIMDE_FLOAT64_C(  -15.62)),
2396       simde_mm_set_pd(SIMDE_FLOAT64_C( -983.99), SIMDE_FLOAT64_C(  351.32)),
2397       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  811.87), SIMDE_FLOAT64_C(  -15.62)), simde_mm_setzero_pd()) },
2398     { simde_mm_set_pd(SIMDE_FLOAT64_C(   42.47), SIMDE_FLOAT64_C( -523.00)),
2399       simde_mm_set_pd(SIMDE_FLOAT64_C(  286.68), SIMDE_FLOAT64_C(  254.00)),
2400       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(   42.47), SIMDE_FLOAT64_C( -523.00)), simde_mm_setzero_pd()) },
2401     { simde_mm_set_pd(SIMDE_FLOAT64_C( -219.91), SIMDE_FLOAT64_C( -253.29)),
2402       simde_mm_set_pd(SIMDE_FLOAT64_C( -554.73), SIMDE_FLOAT64_C(  225.44)),
2403       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -219.91), SIMDE_FLOAT64_C( -253.29)), simde_mm_setzero_pd()) },
2404     { simde_mm_set_pd(SIMDE_FLOAT64_C( -901.30), SIMDE_FLOAT64_C( -538.38)),
2405       simde_mm_set_pd(SIMDE_FLOAT64_C( -584.99), SIMDE_FLOAT64_C(   91.26)),
2406       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -901.30), SIMDE_FLOAT64_C( -538.38)), simde_mm_setzero_pd()) }
2407   };
2408 
2409   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2410     simde__m128d r = simde_mm_cmpge_sd(test_vec[i].a, test_vec[i].b);
2411     simde_assert_m128d_equal(r, test_vec[i].r);
2412   }
2413 
2414   return 0;
2415 }
2416 
2417 static int
test_simde_mm_cmpnge_pd(SIMDE_MUNIT_TEST_ARGS)2418 test_simde_mm_cmpnge_pd(SIMDE_MUNIT_TEST_ARGS) {
2419   const struct {
2420     simde__m128d a;
2421     simde__m128d b;
2422     simde__m128d r;
2423   } test_vec[8] = {
2424     { simde_mm_set_pd(SIMDE_FLOAT64_C(   65.48), SIMDE_FLOAT64_C( -195.60)),
2425       simde_mm_set_pd(SIMDE_FLOAT64_C(   65.48), SIMDE_FLOAT64_C(   18.27)),
2426       simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2427     { simde_mm_set_pd(SIMDE_FLOAT64_C( -563.65), SIMDE_FLOAT64_C(  884.03)),
2428       simde_mm_set_pd(SIMDE_FLOAT64_C(  467.71), SIMDE_FLOAT64_C( -906.63)),
2429       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2430     { simde_mm_set_pd(SIMDE_FLOAT64_C( -553.85), SIMDE_FLOAT64_C(   49.09)),
2431       simde_mm_set_pd(SIMDE_FLOAT64_C(  731.88), SIMDE_FLOAT64_C(  974.91)),
2432       simde_x_mm_setone_pd() },
2433     { simde_mm_set_pd(SIMDE_FLOAT64_C(  492.98), SIMDE_FLOAT64_C(   64.21)),
2434       simde_mm_set_pd(SIMDE_FLOAT64_C( -392.36), SIMDE_FLOAT64_C( -188.43)),
2435       simde_mm_setzero_pd() },
2436     { simde_mm_set_pd(SIMDE_FLOAT64_C( -248.75), SIMDE_FLOAT64_C( -404.39)),
2437       simde_mm_set_pd(SIMDE_FLOAT64_C( -495.92), SIMDE_FLOAT64_C( -819.81)),
2438       simde_mm_setzero_pd() },
2439     { simde_mm_set_pd(SIMDE_FLOAT64_C( -932.57), SIMDE_FLOAT64_C(  741.27)),
2440       simde_mm_set_pd(SIMDE_FLOAT64_C( -307.42), SIMDE_FLOAT64_C(  170.69)),
2441       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2442     { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.12), SIMDE_FLOAT64_C( -161.45)),
2443       simde_mm_set_pd(SIMDE_FLOAT64_C( -939.12), SIMDE_FLOAT64_C( -161.45)),
2444       simde_mm_setzero_pd() },
2445     { simde_mm_set_pd(SIMDE_FLOAT64_C( -900.20), SIMDE_FLOAT64_C( -314.63)),
2446       simde_mm_set_pd(SIMDE_FLOAT64_C(  138.12), SIMDE_FLOAT64_C(  517.19)),
2447       simde_x_mm_setone_pd() }
2448   };
2449 
2450   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2451     simde__m128d r = simde_mm_cmpnge_pd(test_vec[i].a, test_vec[i].b);
2452     simde_assert_m128d_equal(r, test_vec[i].r);
2453   }
2454 
2455   return 0;
2456 }
2457 
2458 static int
test_simde_mm_cmpnge_sd(SIMDE_MUNIT_TEST_ARGS)2459 test_simde_mm_cmpnge_sd(SIMDE_MUNIT_TEST_ARGS) {
2460   const struct {
2461     simde__m128d a;
2462     simde__m128d b;
2463     simde__m128d r;
2464   } test_vec[8] = {
2465     { simde_mm_set_pd(SIMDE_FLOAT64_C( -695.66), SIMDE_FLOAT64_C(  678.35)),
2466       simde_mm_set_pd(SIMDE_FLOAT64_C(  356.43), SIMDE_FLOAT64_C(  495.31)),
2467       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -695.66), SIMDE_FLOAT64_C(  678.35)), simde_mm_setzero_pd()) },
2468     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -72.63), SIMDE_FLOAT64_C(  895.56)),
2469       simde_mm_set_pd(SIMDE_FLOAT64_C( -885.88), SIMDE_FLOAT64_C(  947.04)),
2470       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  -72.63), SIMDE_FLOAT64_C(  895.56)), simde_x_mm_setone_pd()) },
2471     { simde_mm_set_pd(SIMDE_FLOAT64_C(   72.92), SIMDE_FLOAT64_C( -711.12)),
2472       simde_mm_set_pd(SIMDE_FLOAT64_C( -242.49), SIMDE_FLOAT64_C( -686.51)),
2473       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(   72.92), SIMDE_FLOAT64_C( -711.12)), simde_x_mm_setone_pd()) },
2474     { simde_mm_set_pd(SIMDE_FLOAT64_C(  520.17), SIMDE_FLOAT64_C(  176.32)),
2475       simde_mm_set_pd(SIMDE_FLOAT64_C( -442.78), SIMDE_FLOAT64_C( -956.19)),
2476       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  520.17), SIMDE_FLOAT64_C(  176.32)), simde_mm_setzero_pd()) },
2477     { simde_mm_set_pd(SIMDE_FLOAT64_C(  949.11), SIMDE_FLOAT64_C(  112.35)),
2478       simde_mm_set_pd(SIMDE_FLOAT64_C( -212.07), SIMDE_FLOAT64_C(  851.84)),
2479       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  949.11), SIMDE_FLOAT64_C(  112.35)), simde_x_mm_setone_pd()) },
2480     { simde_mm_set_pd(SIMDE_FLOAT64_C( -142.01), SIMDE_FLOAT64_C( -216.70)),
2481       simde_mm_set_pd(SIMDE_FLOAT64_C( -467.63), SIMDE_FLOAT64_C(  481.36)),
2482       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -142.01), SIMDE_FLOAT64_C( -216.70)), simde_x_mm_setone_pd()) },
2483     { simde_mm_set_pd(SIMDE_FLOAT64_C(  918.90), SIMDE_FLOAT64_C(  481.59)),
2484       simde_mm_set_pd(SIMDE_FLOAT64_C( -147.11), SIMDE_FLOAT64_C(  677.03)),
2485       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  918.90), SIMDE_FLOAT64_C(  481.59)), simde_x_mm_setone_pd()) },
2486     { simde_mm_set_pd(SIMDE_FLOAT64_C( -143.49), SIMDE_FLOAT64_C(  447.22)),
2487       simde_mm_set_pd(SIMDE_FLOAT64_C(   50.06), SIMDE_FLOAT64_C(  827.25)),
2488       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -143.49), SIMDE_FLOAT64_C(  447.22)), simde_x_mm_setone_pd()) }
2489   };
2490 
2491   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2492     simde__m128d r = simde_mm_cmpnge_sd(test_vec[i].a, test_vec[i].b);
2493     simde_assert_m128d_equal(r, test_vec[i].r);
2494   }
2495 
2496   return 0;
2497 }
2498 
2499 static int
test_simde_mm_cmpord_pd(SIMDE_MUNIT_TEST_ARGS)2500 test_simde_mm_cmpord_pd(SIMDE_MUNIT_TEST_ARGS) {
2501   const struct {
2502     simde__m128d a;
2503     simde__m128d b;
2504     simde__m128d r;
2505   } test_vec[8] = {
2506     { simde_mm_set_pd(SIMDE_FLOAT64_C(  107.72), SIMDE_FLOAT64_C( -915.48)),
2507       simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -303.84)),
2508       simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2509     { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  173.20)),
2510       simde_mm_set_pd(SIMDE_FLOAT64_C( -817.33), SIMDE_FLOAT64_C(  659.40)),
2511       simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2512     { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_MATH_NAN),
2513       simde_mm_set_pd(SIMDE_FLOAT64_C( -425.32), SIMDE_FLOAT64_C(  993.95)),
2514       simde_mm_setzero_pd() },
2515     { simde_mm_set_pd(SIMDE_FLOAT64_C( -650.75), SIMDE_MATH_NAN),
2516       simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -971.81)),
2517       simde_mm_setzero_pd() },
2518     { simde_mm_set_pd(SIMDE_FLOAT64_C( -761.29), SIMDE_FLOAT64_C( -694.76)),
2519       simde_mm_set_pd(SIMDE_FLOAT64_C( -709.09), SIMDE_FLOAT64_C(  614.12)),
2520       simde_x_mm_setone_pd() },
2521     { simde_mm_set_pd(SIMDE_FLOAT64_C(  498.19), SIMDE_FLOAT64_C( -379.74)),
2522       simde_mm_set_pd(SIMDE_FLOAT64_C( -247.48), SIMDE_FLOAT64_C( -578.21)),
2523       simde_x_mm_setone_pd() },
2524     { simde_mm_set_pd(SIMDE_FLOAT64_C(  101.51), SIMDE_FLOAT64_C(  387.46)),
2525       simde_mm_set_pd(SIMDE_FLOAT64_C(  215.97), SIMDE_FLOAT64_C(  173.76)),
2526       simde_x_mm_setone_pd() },
2527     { simde_mm_set_pd(SIMDE_FLOAT64_C(  729.13), SIMDE_FLOAT64_C(  771.13)),
2528       simde_mm_set_pd(SIMDE_FLOAT64_C(  902.43), SIMDE_FLOAT64_C( -416.43)),
2529       simde_x_mm_setone_pd() }
2530   };
2531 
2532   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2533     simde__m128d r = simde_mm_cmpord_pd(test_vec[i].a, test_vec[i].b);
2534     simde_assert_m128d_equal(r, test_vec[i].r);
2535   }
2536 
2537   return 0;
2538 }
2539 
2540 static int
test_simde_mm_cmpord_sd(SIMDE_MUNIT_TEST_ARGS)2541 test_simde_mm_cmpord_sd(SIMDE_MUNIT_TEST_ARGS) {
2542   const struct {
2543     simde__m128d a;
2544     simde__m128d b;
2545     simde__m128d r;
2546   } test_vec[] = {
2547     { simde_mm_set_pd(SIMDE_FLOAT64_C(  602.71), SIMDE_FLOAT64_C( -732.62)),
2548       simde_mm_set_pd(SIMDE_FLOAT64_C(  116.21), SIMDE_FLOAT64_C( -560.07)),
2549       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  602.71), SIMDE_FLOAT64_C( -732.62)), simde_x_mm_setone_pd()) },
2550     { simde_mm_set_pd(SIMDE_FLOAT64_C(  947.95), SIMDE_MATH_NAN),
2551       simde_mm_set_pd(SIMDE_FLOAT64_C(  -66.03), SIMDE_FLOAT64_C(  -86.78)),
2552       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  947.95), SIMDE_FLOAT64_C(  775.29)), simde_mm_setzero_pd()) },
2553     { simde_mm_set_pd(SIMDE_FLOAT64_C( -455.06), SIMDE_FLOAT64_C(  579.65)),
2554       simde_mm_set_pd(SIMDE_FLOAT64_C( -960.88), SIMDE_MATH_NAN),
2555       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -455.06), SIMDE_FLOAT64_C(  579.65)), simde_mm_setzero_pd()) },
2556     { simde_mm_set_pd(SIMDE_FLOAT64_C(  782.89), SIMDE_MATH_NAN),
2557       simde_mm_set_pd(SIMDE_FLOAT64_C( -540.96), SIMDE_MATH_NAN),
2558       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  782.89), SIMDE_FLOAT64_C( -266.22)), simde_mm_setzero_pd()) },
2559     { simde_mm_set_pd(SIMDE_FLOAT64_C( -750.41), SIMDE_FLOAT64_C( -624.09)),
2560       simde_mm_set_pd(SIMDE_FLOAT64_C( -599.13), SIMDE_FLOAT64_C(  704.00)),
2561       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -750.41), SIMDE_FLOAT64_C( -624.09)), simde_x_mm_setone_pd()) },
2562     { simde_mm_set_pd(SIMDE_FLOAT64_C(  149.22), SIMDE_FLOAT64_C( -876.24)),
2563       simde_mm_set_pd(SIMDE_FLOAT64_C(  871.40), SIMDE_FLOAT64_C(  321.55)),
2564       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  149.22), SIMDE_FLOAT64_C( -876.24)), simde_x_mm_setone_pd()) },
2565     { simde_mm_set_pd(SIMDE_FLOAT64_C( -822.79), SIMDE_FLOAT64_C(  890.31)),
2566       simde_mm_set_pd(SIMDE_FLOAT64_C( -260.78), SIMDE_FLOAT64_C(  386.76)),
2567       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -822.79), SIMDE_FLOAT64_C(  890.31)), simde_x_mm_setone_pd()) },
2568     { simde_mm_set_pd(SIMDE_FLOAT64_C( -370.89), SIMDE_FLOAT64_C( -622.25)),
2569       simde_mm_set_pd(SIMDE_FLOAT64_C(  587.16), SIMDE_FLOAT64_C( -811.86)),
2570       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -370.89), SIMDE_FLOAT64_C( -622.25)), simde_x_mm_setone_pd()) }
2571   };
2572 
2573   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2574     simde__m128d r = simde_mm_cmpord_sd(test_vec[i].a, test_vec[i].b);
2575     simde_assert_m128d_equal(r, test_vec[i].r);
2576   }
2577 
2578   return 0;
2579 }
2580 
2581 static int
test_simde_mm_cmpunord_pd(SIMDE_MUNIT_TEST_ARGS)2582 test_simde_mm_cmpunord_pd(SIMDE_MUNIT_TEST_ARGS) {
2583   const struct {
2584     simde__m128d a;
2585     simde__m128d b;
2586     simde__m128d r;
2587   } test_vec[8] = {
2588     { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  670.49)),
2589       simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  826.75)),
2590       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2591     { simde_mm_set_pd(SIMDE_FLOAT64_C( -774.95), SIMDE_MATH_NAN),
2592       simde_mm_set_pd(SIMDE_FLOAT64_C(  247.71), SIMDE_MATH_NAN),
2593       simde_mm_move_sd(simde_mm_setzero_pd(), simde_x_mm_setone_pd()) },
2594     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -88.77), SIMDE_FLOAT64_C(  116.09)),
2595       simde_mm_set_pd(SIMDE_FLOAT64_C(  -32.79), SIMDE_FLOAT64_C( -442.07)),
2596       simde_mm_setzero_pd() },
2597     { simde_mm_set_pd(SIMDE_FLOAT64_C(   71.71), SIMDE_FLOAT64_C(  549.42)),
2598       simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_FLOAT64_C( -288.27)),
2599       simde_mm_move_sd(simde_x_mm_setone_pd(), simde_mm_setzero_pd()) },
2600     { simde_mm_set_pd(SIMDE_FLOAT64_C( -266.24), SIMDE_FLOAT64_C( -147.24)),
2601       simde_mm_set_pd(SIMDE_FLOAT64_C(  900.46), SIMDE_FLOAT64_C( -288.71)),
2602       simde_mm_setzero_pd() },
2603     { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_MATH_NAN),
2604       simde_mm_set_pd(SIMDE_FLOAT64_C(  196.30), SIMDE_MATH_NAN),
2605       simde_x_mm_setone_pd() },
2606     { simde_mm_set_pd(SIMDE_FLOAT64_C( -447.36), SIMDE_FLOAT64_C(  236.69)),
2607       simde_mm_set_pd(SIMDE_FLOAT64_C( -774.85), SIMDE_FLOAT64_C( -611.68)),
2608       simde_mm_setzero_pd() },
2609     { simde_mm_set_pd(SIMDE_MATH_NAN, SIMDE_MATH_NAN),
2610       simde_mm_set_pd(SIMDE_FLOAT64_C(  711.66), SIMDE_FLOAT64_C( -751.40)),
2611       simde_x_mm_setone_pd() }
2612   };
2613 
2614   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2615     simde__m128d r = simde_mm_cmpunord_pd(test_vec[i].a, test_vec[i].b);
2616     simde_assert_m128i_u64(simde_mm_castpd_si128(r), ==, simde_mm_castpd_si128(test_vec[i].r));
2617   }
2618 
2619   return 0;
2620 }
2621 
2622 static int
test_simde_mm_cmpunord_sd(SIMDE_MUNIT_TEST_ARGS)2623 test_simde_mm_cmpunord_sd(SIMDE_MUNIT_TEST_ARGS) {
2624   const struct {
2625     simde__m128d a;
2626     simde__m128d b;
2627     simde__m128d r;
2628   } test_vec[8] = {
2629     { simde_mm_set_pd(SIMDE_FLOAT64_C( -580.90), SIMDE_FLOAT64_C(  757.46)),
2630       simde_mm_set_pd(SIMDE_FLOAT64_C( -779.63), SIMDE_FLOAT64_C(   96.79)),
2631       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -580.90), SIMDE_FLOAT64_C(  757.46)), simde_mm_setzero_pd()) },
2632     { simde_mm_set_pd(SIMDE_FLOAT64_C( -607.89), SIMDE_MATH_NAN),
2633       simde_mm_set_pd(SIMDE_FLOAT64_C(  751.46), SIMDE_FLOAT64_C(  753.64)),
2634       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -607.89), SIMDE_FLOAT64_C( -882.75)), simde_x_mm_setone_pd()) },
2635     { simde_mm_set_pd(SIMDE_FLOAT64_C(  177.62), SIMDE_FLOAT64_C( -618.39)),
2636       simde_mm_set_pd(SIMDE_FLOAT64_C( -958.41), SIMDE_MATH_NAN),
2637       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  177.62), SIMDE_FLOAT64_C( -618.39)), simde_x_mm_setone_pd()) },
2638     { simde_mm_set_pd(SIMDE_FLOAT64_C(  910.88), SIMDE_MATH_NAN),
2639       simde_mm_set_pd(SIMDE_FLOAT64_C( -924.01), SIMDE_MATH_NAN),
2640       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  910.88), SIMDE_FLOAT64_C(  313.76)), simde_x_mm_setone_pd()) },
2641     { simde_mm_set_pd(SIMDE_FLOAT64_C( -778.12), SIMDE_FLOAT64_C( -472.40)),
2642       simde_mm_set_pd(SIMDE_FLOAT64_C(  400.92), SIMDE_FLOAT64_C( -453.41)),
2643       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -778.12), SIMDE_FLOAT64_C( -472.40)), simde_mm_setzero_pd()) },
2644     { simde_mm_set_pd(SIMDE_FLOAT64_C(  933.47), SIMDE_FLOAT64_C( -426.60)),
2645       simde_mm_set_pd(SIMDE_FLOAT64_C(  836.37), SIMDE_FLOAT64_C(  329.66)),
2646       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C(  933.47), SIMDE_FLOAT64_C( -426.60)), simde_mm_setzero_pd()) },
2647     { simde_mm_set_pd(SIMDE_FLOAT64_C( -563.76), SIMDE_FLOAT64_C(  455.35)),
2648       simde_mm_set_pd(SIMDE_FLOAT64_C( -169.32), SIMDE_FLOAT64_C( -459.10)),
2649       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -563.76), SIMDE_FLOAT64_C(  455.35)), simde_mm_setzero_pd()) },
2650     { simde_mm_set_pd(SIMDE_FLOAT64_C( -117.48), SIMDE_FLOAT64_C( -934.82)),
2651       simde_mm_set_pd(SIMDE_FLOAT64_C(  177.09), SIMDE_FLOAT64_C(  194.89)),
2652       simde_mm_move_sd(simde_mm_set_pd(SIMDE_FLOAT64_C( -117.48), SIMDE_FLOAT64_C( -934.82)), simde_mm_setzero_pd()) }
2653   };
2654 
2655   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2656     simde__m128d r = simde_mm_cmpunord_sd(test_vec[i].a, test_vec[i].b);
2657     simde_assert_m128d_equal(r, test_vec[i].r);
2658   }
2659 
2660   return 0;
2661 }
2662 
2663 static int
test_simde_mm_comieq_sd(SIMDE_MUNIT_TEST_ARGS)2664 test_simde_mm_comieq_sd(SIMDE_MUNIT_TEST_ARGS) {
2665   const struct {
2666     simde__m128d a;
2667     simde__m128d b;
2668     int r;
2669   } test_vec[8] = {
2670     { simde_mm_set_pd(SIMDE_FLOAT64_C( -969.61), SIMDE_FLOAT64_C(  839.23)),
2671       simde_mm_set_pd(SIMDE_FLOAT64_C( -969.61), SIMDE_FLOAT64_C( -432.69)),
2672       0 },
2673     { simde_mm_set_pd(SIMDE_FLOAT64_C(  281.50), SIMDE_FLOAT64_C( -752.55)),
2674       simde_mm_set_pd(SIMDE_FLOAT64_C(  281.50), SIMDE_FLOAT64_C( -752.55)),
2675       1 },
2676     { simde_mm_set_pd(SIMDE_FLOAT64_C(  590.09), SIMDE_FLOAT64_C(  270.42)),
2677       simde_mm_set_pd(SIMDE_FLOAT64_C( -206.33), SIMDE_FLOAT64_C(  270.42)),
2678       1 },
2679     { simde_mm_set_pd(SIMDE_FLOAT64_C(  412.70), SIMDE_FLOAT64_C( -500.58)),
2680       simde_mm_set_pd(SIMDE_FLOAT64_C(  145.06), SIMDE_FLOAT64_C(  763.45)),
2681       0 },
2682     { simde_mm_set_pd(SIMDE_FLOAT64_C( -374.23), SIMDE_FLOAT64_C(  380.82)),
2683       simde_mm_set_pd(SIMDE_FLOAT64_C( -374.23), SIMDE_FLOAT64_C(  380.82)),
2684       1 },
2685     { simde_mm_set_pd(SIMDE_FLOAT64_C( -875.64), SIMDE_FLOAT64_C(   30.13)),
2686       simde_mm_set_pd(SIMDE_FLOAT64_C( -823.83), SIMDE_FLOAT64_C(   30.13)),
2687       1 },
2688     { simde_mm_set_pd(SIMDE_FLOAT64_C(  890.78), SIMDE_FLOAT64_C( -652.66)),
2689       simde_mm_set_pd(SIMDE_FLOAT64_C(  719.69), SIMDE_FLOAT64_C( -685.53)),
2690       0 },
2691     { simde_mm_set_pd(SIMDE_FLOAT64_C(  740.88), SIMDE_FLOAT64_C(  116.37)),
2692       simde_mm_set_pd(SIMDE_FLOAT64_C( -528.65), SIMDE_FLOAT64_C(  536.46)),
2693       0 }
2694   };
2695 
2696   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2697     int r = simde_mm_comieq_sd(test_vec[i].a, test_vec[i].b);
2698     simde_assert_equal_i(r, test_vec[i].r);
2699   }
2700 
2701   return 0;
2702 }
2703 
2704 static int
test_simde_mm_comige_sd(SIMDE_MUNIT_TEST_ARGS)2705 test_simde_mm_comige_sd(SIMDE_MUNIT_TEST_ARGS) {
2706   const struct {
2707     simde__m128d a;
2708     simde__m128d b;
2709     int r;
2710   } test_vec[8] = {
2711     { simde_mm_set_pd(SIMDE_FLOAT64_C(  105.66), SIMDE_FLOAT64_C(  552.43)),
2712       simde_mm_set_pd(SIMDE_FLOAT64_C(  105.66), SIMDE_FLOAT64_C(  267.88)),
2713       1 },
2714     { simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( -921.70)),
2715       simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C(  330.81)),
2716       0 },
2717     { simde_mm_set_pd(SIMDE_FLOAT64_C(  865.75), SIMDE_FLOAT64_C( -938.03)),
2718       simde_mm_set_pd(SIMDE_FLOAT64_C(  865.75), SIMDE_FLOAT64_C(  970.01)),
2719       0 },
2720     { simde_mm_set_pd(SIMDE_FLOAT64_C( -158.01), SIMDE_FLOAT64_C(  635.18)),
2721       simde_mm_set_pd(SIMDE_FLOAT64_C( -394.88), SIMDE_FLOAT64_C(  -19.73)),
2722       1 },
2723     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -29.19), SIMDE_FLOAT64_C( -429.43)),
2724       simde_mm_set_pd(SIMDE_FLOAT64_C(  -29.19), SIMDE_FLOAT64_C(  -32.37)),
2725       0 },
2726     { simde_mm_set_pd(SIMDE_FLOAT64_C(  507.45), SIMDE_FLOAT64_C( -241.62)),
2727       simde_mm_set_pd(SIMDE_FLOAT64_C(  507.45), SIMDE_FLOAT64_C(  500.55)),
2728       0 },
2729     { simde_mm_set_pd(SIMDE_FLOAT64_C( -667.19), SIMDE_FLOAT64_C(  338.98)),
2730       simde_mm_set_pd(SIMDE_FLOAT64_C(  225.94), SIMDE_FLOAT64_C(  338.98)),
2731       1 },
2732     { simde_mm_set_pd(SIMDE_FLOAT64_C(  890.13), SIMDE_FLOAT64_C( -203.09)),
2733       simde_mm_set_pd(SIMDE_FLOAT64_C( -221.49), SIMDE_FLOAT64_C(  304.99)),
2734       0 }
2735   };
2736 
2737   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2738     int r = simde_mm_comige_sd(test_vec[i].a, test_vec[i].b);
2739     simde_assert_equal_i(r, test_vec[i].r);
2740   }
2741 
2742   return 0;
2743 }
2744 
2745 static int
test_simde_mm_comigt_sd(SIMDE_MUNIT_TEST_ARGS)2746 test_simde_mm_comigt_sd(SIMDE_MUNIT_TEST_ARGS) {
2747   const struct {
2748     simde__m128d a;
2749     simde__m128d b;
2750     int r;
2751   } test_vec[8] = {
2752     { simde_mm_set_pd(SIMDE_FLOAT64_C(  105.66), SIMDE_FLOAT64_C(  552.43)),
2753       simde_mm_set_pd(SIMDE_FLOAT64_C(  105.66), SIMDE_FLOAT64_C(  267.88)),
2754       1 },
2755     { simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( -921.70)),
2756       simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C(  330.81)),
2757       0 },
2758     { simde_mm_set_pd(SIMDE_FLOAT64_C(  865.75), SIMDE_FLOAT64_C( -938.03)),
2759       simde_mm_set_pd(SIMDE_FLOAT64_C(  865.75), SIMDE_FLOAT64_C(  970.01)),
2760       0 },
2761     { simde_mm_set_pd(SIMDE_FLOAT64_C( -158.01), SIMDE_FLOAT64_C(  635.18)),
2762       simde_mm_set_pd(SIMDE_FLOAT64_C( -394.88), SIMDE_FLOAT64_C(  -19.73)),
2763       1 },
2764     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -29.19), SIMDE_FLOAT64_C( -429.43)),
2765       simde_mm_set_pd(SIMDE_FLOAT64_C(  -29.19), SIMDE_FLOAT64_C(  -32.37)),
2766       0 },
2767     { simde_mm_set_pd(SIMDE_FLOAT64_C(  507.45), SIMDE_FLOAT64_C( -241.62)),
2768       simde_mm_set_pd(SIMDE_FLOAT64_C(  507.45), SIMDE_FLOAT64_C(  500.55)),
2769       0 },
2770     { simde_mm_set_pd(SIMDE_FLOAT64_C( -667.19), SIMDE_FLOAT64_C(  338.98)),
2771       simde_mm_set_pd(SIMDE_FLOAT64_C(  225.94), SIMDE_FLOAT64_C(  338.98)),
2772       0 },
2773     { simde_mm_set_pd(SIMDE_FLOAT64_C(  890.13), SIMDE_FLOAT64_C( -203.09)),
2774       simde_mm_set_pd(SIMDE_FLOAT64_C( -221.49), SIMDE_FLOAT64_C(  304.99)),
2775       0 }
2776   };
2777 
2778   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2779     int r = simde_mm_comigt_sd(test_vec[i].a, test_vec[i].b);
2780     simde_assert_equal_i(r, test_vec[i].r);
2781   }
2782 
2783   return 0;
2784 }
2785 
2786 static int
test_simde_mm_comile_sd(SIMDE_MUNIT_TEST_ARGS)2787 test_simde_mm_comile_sd(SIMDE_MUNIT_TEST_ARGS) {
2788   const struct {
2789     simde__m128d a;
2790     simde__m128d b;
2791     int r;
2792   } test_vec[8] = {
2793     { simde_mm_set_pd(SIMDE_FLOAT64_C(  105.66), SIMDE_FLOAT64_C(  552.43)),
2794       simde_mm_set_pd(SIMDE_FLOAT64_C(  105.66), SIMDE_FLOAT64_C(  267.88)),
2795       0 },
2796     { simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( -921.70)),
2797       simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C(  330.81)),
2798       1 },
2799     { simde_mm_set_pd(SIMDE_FLOAT64_C(  865.75), SIMDE_FLOAT64_C( -938.03)),
2800       simde_mm_set_pd(SIMDE_FLOAT64_C(  865.75), SIMDE_FLOAT64_C(  970.01)),
2801       1 },
2802     { simde_mm_set_pd(SIMDE_FLOAT64_C( -158.01), SIMDE_FLOAT64_C(  635.18)),
2803       simde_mm_set_pd(SIMDE_FLOAT64_C( -394.88), SIMDE_FLOAT64_C(  -19.73)),
2804       0 },
2805     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -29.19), SIMDE_FLOAT64_C( -429.43)),
2806       simde_mm_set_pd(SIMDE_FLOAT64_C(  -29.19), SIMDE_FLOAT64_C(  -32.37)),
2807       1 },
2808     { simde_mm_set_pd(SIMDE_FLOAT64_C(  507.45), SIMDE_FLOAT64_C( -241.62)),
2809       simde_mm_set_pd(SIMDE_FLOAT64_C(  507.45), SIMDE_FLOAT64_C(  500.55)),
2810       1 },
2811     { simde_mm_set_pd(SIMDE_FLOAT64_C( -667.19), SIMDE_FLOAT64_C(  338.98)),
2812       simde_mm_set_pd(SIMDE_FLOAT64_C(  225.94), SIMDE_FLOAT64_C(  338.98)),
2813       1 },
2814     { simde_mm_set_pd(SIMDE_FLOAT64_C(  890.13), SIMDE_FLOAT64_C( -203.09)),
2815       simde_mm_set_pd(SIMDE_FLOAT64_C( -221.49), SIMDE_FLOAT64_C(  304.99)),
2816       1 }
2817   };
2818 
2819   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2820     int r = simde_mm_comile_sd(test_vec[i].a, test_vec[i].b);
2821     simde_assert_equal_i(r, test_vec[i].r);
2822   }
2823 
2824   return 0;
2825 }
2826 
2827 static int
test_simde_mm_comilt_sd(SIMDE_MUNIT_TEST_ARGS)2828 test_simde_mm_comilt_sd(SIMDE_MUNIT_TEST_ARGS) {
2829   const struct {
2830     simde__m128d a;
2831     simde__m128d b;
2832     int r;
2833   } test_vec[8] = {
2834     { simde_mm_set_pd(SIMDE_FLOAT64_C(  766.54), SIMDE_FLOAT64_C(  -69.58)),
2835       simde_mm_set_pd(SIMDE_FLOAT64_C(  185.38), SIMDE_FLOAT64_C(  -69.58)),
2836       0 },
2837     { simde_mm_set_pd(SIMDE_FLOAT64_C(  489.65), SIMDE_FLOAT64_C(  372.98)),
2838       simde_mm_set_pd(SIMDE_FLOAT64_C(  489.65), SIMDE_FLOAT64_C(  372.98)),
2839       0 },
2840     { simde_mm_set_pd(SIMDE_FLOAT64_C(   79.48), SIMDE_FLOAT64_C( -168.45)),
2841       simde_mm_set_pd(SIMDE_FLOAT64_C( -648.03), SIMDE_FLOAT64_C( -710.04)),
2842       0 },
2843     { simde_mm_set_pd(SIMDE_FLOAT64_C(  907.60), SIMDE_FLOAT64_C(  955.73)),
2844       simde_mm_set_pd(SIMDE_FLOAT64_C(  907.60), SIMDE_FLOAT64_C( -965.39)),
2845       0 },
2846     { simde_mm_set_pd(SIMDE_FLOAT64_C( -237.33), SIMDE_FLOAT64_C(  558.83)),
2847       simde_mm_set_pd(SIMDE_FLOAT64_C(  415.12), SIMDE_FLOAT64_C(  558.83)),
2848       0 },
2849     { simde_mm_set_pd(SIMDE_FLOAT64_C( -796.13), SIMDE_FLOAT64_C(   18.69)),
2850       simde_mm_set_pd(SIMDE_FLOAT64_C( -796.13), SIMDE_FLOAT64_C(   18.69)),
2851       0 },
2852     { simde_mm_set_pd(SIMDE_FLOAT64_C( -380.36), SIMDE_FLOAT64_C( -737.73)),
2853       simde_mm_set_pd(SIMDE_FLOAT64_C( -380.36), SIMDE_FLOAT64_C( -737.73)),
2854       0 },
2855     { simde_mm_set_pd(SIMDE_FLOAT64_C( -975.15), SIMDE_FLOAT64_C( -296.93)),
2856       simde_mm_set_pd(SIMDE_FLOAT64_C( -975.15), SIMDE_FLOAT64_C( -296.93)),
2857       0 }
2858   };
2859 
2860   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2861     int r = simde_mm_comilt_sd(test_vec[i].a, test_vec[i].b);
2862     simde_assert_equal_i(r, test_vec[i].r);
2863   }
2864 
2865   return 0;
2866 }
2867 
2868 static int
test_simde_mm_comineq_sd(SIMDE_MUNIT_TEST_ARGS)2869 test_simde_mm_comineq_sd(SIMDE_MUNIT_TEST_ARGS) {
2870   const struct {
2871     simde__m128d a;
2872     simde__m128d b;
2873     int r;
2874   } test_vec[8] = {
2875     { simde_mm_set_pd(SIMDE_FLOAT64_C(  105.66), SIMDE_FLOAT64_C(  552.43)),
2876       simde_mm_set_pd(SIMDE_FLOAT64_C(  105.66), SIMDE_FLOAT64_C(  267.88)),
2877       1 },
2878     { simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C( -921.70)),
2879       simde_mm_set_pd(SIMDE_FLOAT64_C( -590.31), SIMDE_FLOAT64_C(  330.81)),
2880       1 },
2881     { simde_mm_set_pd(SIMDE_FLOAT64_C(  865.75), SIMDE_FLOAT64_C( -938.03)),
2882       simde_mm_set_pd(SIMDE_FLOAT64_C(  865.75), SIMDE_FLOAT64_C(  970.01)),
2883       1 },
2884     { simde_mm_set_pd(SIMDE_FLOAT64_C( -158.01), SIMDE_FLOAT64_C(  635.18)),
2885       simde_mm_set_pd(SIMDE_FLOAT64_C( -394.88), SIMDE_FLOAT64_C(  -19.73)),
2886       1 },
2887     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -29.19), SIMDE_FLOAT64_C( -429.43)),
2888       simde_mm_set_pd(SIMDE_FLOAT64_C(  -29.19), SIMDE_FLOAT64_C(  -32.37)),
2889       1 },
2890     { simde_mm_set_pd(SIMDE_FLOAT64_C(  507.45), SIMDE_FLOAT64_C( -241.62)),
2891       simde_mm_set_pd(SIMDE_FLOAT64_C(  507.45), SIMDE_FLOAT64_C(  500.55)),
2892       1 },
2893     { simde_mm_set_pd(SIMDE_FLOAT64_C( -667.19), SIMDE_FLOAT64_C(  338.98)),
2894       simde_mm_set_pd(SIMDE_FLOAT64_C(  225.94), SIMDE_FLOAT64_C(  338.98)),
2895       0 },
2896     { simde_mm_set_pd(SIMDE_FLOAT64_C(  890.13), SIMDE_FLOAT64_C( -203.09)),
2897       simde_mm_set_pd(SIMDE_FLOAT64_C( -221.49), SIMDE_FLOAT64_C(  304.99)),
2898       1 }
2899   };
2900 
2901   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
2902     int r = simde_mm_comineq_sd(test_vec[i].a, test_vec[i].b);
2903     simde_assert_equal_i(r, test_vec[i].r);
2904   }
2905 
2906   return 0;
2907 }
2908 
2909 static int
test_simde_x_mm_copysign_pd(SIMDE_MUNIT_TEST_ARGS)2910 test_simde_x_mm_copysign_pd (SIMDE_MUNIT_TEST_ARGS) {
2911   static const struct {
2912     const simde_float64 dest[2];
2913     const simde_float64 src[2];
2914     const simde_float64 r[2];
2915   } test_vec[] = {
2916     { { SIMDE_FLOAT64_C(  -182.01), SIMDE_FLOAT64_C(   309.30) },
2917       { SIMDE_FLOAT64_C(  -125.98), SIMDE_FLOAT64_C(  -334.42) },
2918       { SIMDE_FLOAT64_C(  -182.01), SIMDE_FLOAT64_C(  -309.30) } },
2919     { { SIMDE_FLOAT64_C(  -339.97), SIMDE_FLOAT64_C(  -147.14) },
2920       { SIMDE_FLOAT64_C(   534.39), SIMDE_FLOAT64_C(  -377.91) },
2921       { SIMDE_FLOAT64_C(   339.97), SIMDE_FLOAT64_C(  -147.14) } },
2922     { { SIMDE_FLOAT64_C(  -466.30), SIMDE_FLOAT64_C(   794.64) },
2923       { SIMDE_FLOAT64_C(   936.51), SIMDE_FLOAT64_C(  -627.08) },
2924       { SIMDE_FLOAT64_C(   466.30), SIMDE_FLOAT64_C(  -794.64) } },
2925     { { SIMDE_FLOAT64_C(   644.80), SIMDE_FLOAT64_C(   412.58) },
2926       { SIMDE_FLOAT64_C(  -738.56), SIMDE_FLOAT64_C(  -987.18) },
2927       { SIMDE_FLOAT64_C(  -644.80), SIMDE_FLOAT64_C(  -412.58) } },
2928     { { SIMDE_FLOAT64_C(   -54.12), SIMDE_FLOAT64_C(  -858.45) },
2929       { SIMDE_FLOAT64_C(  -554.31), SIMDE_FLOAT64_C(   274.31) },
2930       { SIMDE_FLOAT64_C(   -54.12), SIMDE_FLOAT64_C(   858.45) } },
2931     { { SIMDE_FLOAT64_C(  -106.06), SIMDE_FLOAT64_C(  -482.09) },
2932       { SIMDE_FLOAT64_C(  -505.26), SIMDE_FLOAT64_C(  -310.15) },
2933       { SIMDE_FLOAT64_C(  -106.06), SIMDE_FLOAT64_C(  -482.09) } },
2934     { { SIMDE_FLOAT64_C(   726.18), SIMDE_FLOAT64_C(   941.28) },
2935       { SIMDE_FLOAT64_C(  -987.65), SIMDE_FLOAT64_C(  -463.18) },
2936       { SIMDE_FLOAT64_C(  -726.18), SIMDE_FLOAT64_C(  -941.28) } },
2937     { { SIMDE_FLOAT64_C(  -907.04), SIMDE_FLOAT64_C(  -842.82) },
2938       { SIMDE_FLOAT64_C(  -124.70), SIMDE_FLOAT64_C(   -89.06) },
2939       { SIMDE_FLOAT64_C(  -907.04), SIMDE_FLOAT64_C(  -842.82) } }
2940   };
2941 
2942   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
2943     simde__m128d dest = simde_mm_loadu_pd(test_vec[i].dest);
2944     simde__m128d src = simde_mm_loadu_pd(test_vec[i].src);
2945     simde__m128d r = simde_x_mm_copysign_pd(dest, src);
2946     simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1);
2947   }
2948 
2949   return 0;
2950 }
2951 
2952 static int
test_simde_x_mm_xorsign_pd(SIMDE_MUNIT_TEST_ARGS)2953 test_simde_x_mm_xorsign_pd (SIMDE_MUNIT_TEST_ARGS) {
2954   static const struct {
2955     const simde_float64 a[2];
2956     const simde_float64 b[2];
2957     const simde_float64 r[2];
2958   } test_vec[] = {
2959     { { SIMDE_FLOAT64_C(  -998.44), SIMDE_FLOAT64_C(  -179.45) },
2960       { SIMDE_FLOAT64_C(    34.66), SIMDE_FLOAT64_C(   254.98) },
2961       { SIMDE_FLOAT64_C(  -998.44), SIMDE_FLOAT64_C(  -179.45) } },
2962     { { SIMDE_FLOAT64_C(  -220.74), SIMDE_FLOAT64_C(   718.77) },
2963       { SIMDE_FLOAT64_C(  -648.69), SIMDE_FLOAT64_C(  -598.91) },
2964       { SIMDE_FLOAT64_C(   220.74), SIMDE_FLOAT64_C(  -718.77) } },
2965     { { SIMDE_FLOAT64_C(    84.66), SIMDE_FLOAT64_C(  -602.04) },
2966       { SIMDE_FLOAT64_C(   631.55), SIMDE_FLOAT64_C(  -486.59) },
2967       { SIMDE_FLOAT64_C(    84.66), SIMDE_FLOAT64_C(   602.04) } },
2968     { { SIMDE_FLOAT64_C(   570.81), SIMDE_FLOAT64_C(   368.00) },
2969       { SIMDE_FLOAT64_C(   372.19), SIMDE_FLOAT64_C(  -832.84) },
2970       { SIMDE_FLOAT64_C(   570.81), SIMDE_FLOAT64_C(  -368.00) } },
2971     { { SIMDE_FLOAT64_C(  -996.05), SIMDE_FLOAT64_C(   875.71) },
2972       { SIMDE_FLOAT64_C(   198.29), SIMDE_FLOAT64_C(  -187.87) },
2973       { SIMDE_FLOAT64_C(  -996.05), SIMDE_FLOAT64_C(  -875.71) } },
2974     { { SIMDE_FLOAT64_C(  -462.20), SIMDE_FLOAT64_C(  -277.60) },
2975       { SIMDE_FLOAT64_C(   841.75), SIMDE_FLOAT64_C(   127.22) },
2976       { SIMDE_FLOAT64_C(  -462.20), SIMDE_FLOAT64_C(  -277.60) } },
2977     { { SIMDE_FLOAT64_C(  -669.20), SIMDE_FLOAT64_C(  -206.42) },
2978       { SIMDE_FLOAT64_C(   600.14), SIMDE_FLOAT64_C(    65.01) },
2979       { SIMDE_FLOAT64_C(  -669.20), SIMDE_FLOAT64_C(  -206.42) } },
2980     { { SIMDE_FLOAT64_C(   159.77), SIMDE_FLOAT64_C(  -896.78) },
2981       { SIMDE_FLOAT64_C(   642.72), SIMDE_FLOAT64_C(   161.33) },
2982       { SIMDE_FLOAT64_C(   159.77), SIMDE_FLOAT64_C(  -896.78) } }
2983   };
2984 
2985   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
2986     simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
2987     simde__m128d b = simde_mm_loadu_pd(test_vec[i].b);
2988     simde__m128d r = simde_x_mm_xorsign_pd(a, b);
2989     simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1);
2990   }
2991 
2992   return 0;
2993 }
2994 
2995 static int
test_simde_mm_cvtepi32_pd(SIMDE_MUNIT_TEST_ARGS)2996 test_simde_mm_cvtepi32_pd(SIMDE_MUNIT_TEST_ARGS) {
2997   const struct {
2998     simde__m128i a;
2999     simde__m128d r;
3000   } test_vec[8] = {
3001     { simde_mm_set_epi32( 1668601445,     8850426,  1726684816, -1842005323),
3002       simde_mm_set_pd(SIMDE_FLOAT64_C(1726684816.00), SIMDE_FLOAT64_C(-1842005323.00)) },
3003     { simde_mm_set_epi32(-1162443511,  1098837378,  -970075414,  1210551220),
3004       simde_mm_set_pd(SIMDE_FLOAT64_C(-970075414.00), SIMDE_FLOAT64_C(1210551220.00)) },
3005     { simde_mm_set_epi32( 1014915875,   235168560,   691866984,  -431325465),
3006       simde_mm_set_pd(SIMDE_FLOAT64_C(691866984.00), SIMDE_FLOAT64_C(-431325465.00)) },
3007     { simde_mm_set_epi32( 1621419008,  1286931249, -1424446000,  -169673917),
3008       simde_mm_set_pd(SIMDE_FLOAT64_C(-1424446000.00), SIMDE_FLOAT64_C(-169673917.00)) },
3009     { simde_mm_set_epi32(  982570498,    31161721,   410129833,  1249524705),
3010       simde_mm_set_pd(SIMDE_FLOAT64_C(410129833.00), SIMDE_FLOAT64_C(1249524705.00)) },
3011     { simde_mm_set_epi32(-1807976526,   584564543,  1386856775,  -792093051),
3012       simde_mm_set_pd(SIMDE_FLOAT64_C(1386856775.00), SIMDE_FLOAT64_C(-792093051.00)) },
3013     { simde_mm_set_epi32( 1927957259,   324939853,  1056227907,   960202603),
3014       simde_mm_set_pd(SIMDE_FLOAT64_C(1056227907.00), SIMDE_FLOAT64_C(960202603.00)) },
3015     { simde_mm_set_epi32( 2096858414,  2117774841,   250894175,  1268045519),
3016       simde_mm_set_pd(SIMDE_FLOAT64_C(250894175.00), SIMDE_FLOAT64_C(1268045519.00)) }
3017   };
3018 
3019   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3020     simde__m128d r = simde_mm_cvtepi32_pd(test_vec[i].a);
3021     simde_assert_m128d_close(r, test_vec[i].r, 1);
3022   }
3023 
3024   return 0;
3025 }
3026 
3027 static int
test_simde_mm_cvtepi32_ps(SIMDE_MUNIT_TEST_ARGS)3028 test_simde_mm_cvtepi32_ps(SIMDE_MUNIT_TEST_ARGS) {
3029   const struct {
3030     simde__m128i a;
3031     simde__m128 r;
3032   } test_vec[8] = {
3033     { simde_mm_set_epi32( 332, -787,  -79, -785),
3034       simde_mm_set_ps(SIMDE_FLOAT32_C( 332.00), SIMDE_FLOAT32_C(-787.00), SIMDE_FLOAT32_C( -79.00), SIMDE_FLOAT32_C(-785.00)) },
3035     { simde_mm_set_epi32( 394, -936, -733, -136),
3036       simde_mm_set_ps(SIMDE_FLOAT32_C( 394.00), SIMDE_FLOAT32_C(-936.00), SIMDE_FLOAT32_C(-733.00), SIMDE_FLOAT32_C(-136.00)) },
3037     { simde_mm_set_epi32( 618, -416,  310,  183),
3038       simde_mm_set_ps(SIMDE_FLOAT32_C( 618.00), SIMDE_FLOAT32_C(-416.00), SIMDE_FLOAT32_C( 310.00), SIMDE_FLOAT32_C( 183.00)) },
3039     { simde_mm_set_epi32(-748,  245,  533, -152),
3040       simde_mm_set_ps(SIMDE_FLOAT32_C(-748.00), SIMDE_FLOAT32_C( 245.00), SIMDE_FLOAT32_C( 533.00), SIMDE_FLOAT32_C(-152.00)) },
3041     { simde_mm_set_epi32(  42,  893,  849, -741),
3042       simde_mm_set_ps(SIMDE_FLOAT32_C(  42.00), SIMDE_FLOAT32_C( 893.00), SIMDE_FLOAT32_C( 849.00), SIMDE_FLOAT32_C(-741.00)) },
3043     { simde_mm_set_epi32( 657,  222, -709, -177),
3044       simde_mm_set_ps(SIMDE_FLOAT32_C( 657.00), SIMDE_FLOAT32_C( 222.00), SIMDE_FLOAT32_C(-709.00), SIMDE_FLOAT32_C(-177.00)) },
3045     { simde_mm_set_epi32( 762, -586,  196,  717),
3046       simde_mm_set_ps(SIMDE_FLOAT32_C( 762.00), SIMDE_FLOAT32_C(-586.00), SIMDE_FLOAT32_C( 196.00), SIMDE_FLOAT32_C( 717.00)) },
3047     { simde_mm_set_epi32( 322,  178,  766, -110),
3048       simde_mm_set_ps(SIMDE_FLOAT32_C( 322.00), SIMDE_FLOAT32_C( 178.00), SIMDE_FLOAT32_C( 766.00), SIMDE_FLOAT32_C(-110.00)) }
3049   };
3050 
3051   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3052     simde__m128 r = simde_mm_cvtepi32_ps(test_vec[i].a);
3053     simde_assert_m128_close(r, test_vec[i].r, 1);
3054   }
3055 
3056   return 0;
3057 }
3058 
3059 static int
test_simde_mm_cvtpd_epi32(SIMDE_MUNIT_TEST_ARGS)3060 test_simde_mm_cvtpd_epi32 (SIMDE_MUNIT_TEST_ARGS) {
3061   static const struct {
3062     const simde_float64 a[2];
3063     const int32_t r[4];
3064   } test_vec[] = {
3065     #if !defined(SIMDE_FAST_NANS)
3066     { {             SIMDE_MATH_NAN,            -SIMDE_MATH_NAN },
3067       {            INT32_MIN,             INT32_MIN,  INT32_C(           0),  INT32_C(           0) } },
3068     #endif
3069     #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3070     { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100) },
3071       {              INT32_MIN,  INT32_C(  2147483547),  INT32_C(           0),  INT32_C(           0) } },
3072     { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100 },
3073       {              INT32_MIN, -INT32_C(  2147483548),  INT32_C(           0),  INT32_C(           0) } },
3074     #endif
3075     { { SIMDE_FLOAT64_C(  -220.31), SIMDE_FLOAT64_C(   685.08) },
3076       { -INT32_C(         220),  INT32_C(         685),  INT32_C(           0),  INT32_C(           0) } },
3077     { { SIMDE_FLOAT64_C(  -164.88), SIMDE_FLOAT64_C(   725.51) },
3078       { -INT32_C(         165),  INT32_C(         726),  INT32_C(           0),  INT32_C(           0) } },
3079     { { SIMDE_FLOAT64_C(   152.74), SIMDE_FLOAT64_C(   778.03) },
3080       {  INT32_C(         153),  INT32_C(         778),  INT32_C(           0),  INT32_C(           0) } },
3081     { { SIMDE_FLOAT64_C(  -801.11), SIMDE_FLOAT64_C(  -331.66) },
3082       { -INT32_C(         801), -INT32_C(         332),  INT32_C(           0),  INT32_C(           0) } },
3083     { { SIMDE_FLOAT64_C(  -834.04), SIMDE_FLOAT64_C(   -51.56) },
3084       { -INT32_C(         834), -INT32_C(          52),  INT32_C(           0),  INT32_C(           0) } },
3085     { { SIMDE_FLOAT64_C(   737.22), SIMDE_FLOAT64_C(   205.77) },
3086       {  INT32_C(         737),  INT32_C(         206),  INT32_C(           0),  INT32_C(           0) } }
3087   };
3088 
3089   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3090     simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
3091     simde__m128i r = simde_mm_cvtpd_epi32(a);
3092     simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
3093   }
3094 
3095   return 0;
3096 }
3097 
3098 static int
test_simde_mm_cvtpd_pi32(SIMDE_MUNIT_TEST_ARGS)3099 test_simde_mm_cvtpd_pi32 (SIMDE_MUNIT_TEST_ARGS) {
3100   static const struct {
3101     const simde_float64 a[2];
3102     const int32_t r[2];
3103   } test_vec[] = {
3104     #if !defined(SIMDE_FAST_NANS)
3105     { {             SIMDE_MATH_NAN,            -SIMDE_MATH_NAN },
3106       {            INT32_MIN,             INT32_MIN } },
3107     #endif
3108     #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3109     { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100) },
3110       {              INT32_MIN,  INT32_C(  2147483547) } },
3111     { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100 },
3112       {              INT32_MIN, -INT32_C(  2147483548) } },
3113     #endif
3114     { { SIMDE_FLOAT64_C(  -220.31), SIMDE_FLOAT64_C(   685.08) },
3115       { -INT32_C(         220),  INT32_C(         685) } },
3116     { { SIMDE_FLOAT64_C(  -164.88), SIMDE_FLOAT64_C(   725.51) },
3117       { -INT32_C(         165),  INT32_C(         726) } },
3118     { { SIMDE_FLOAT64_C(   152.74), SIMDE_FLOAT64_C(   778.03) },
3119       {  INT32_C(         153),  INT32_C(         778) } },
3120     { { SIMDE_FLOAT64_C(  -801.11), SIMDE_FLOAT64_C(  -331.66) },
3121       { -INT32_C(         801), -INT32_C(         332) } },
3122     { { SIMDE_FLOAT64_C(  -834.04), SIMDE_FLOAT64_C(   -51.56) },
3123       { -INT32_C(         834), -INT32_C(          52) } },
3124     { { SIMDE_FLOAT64_C(   737.22), SIMDE_FLOAT64_C(   205.77) },
3125       {  INT32_C(         737),  INT32_C(         206) } }
3126   };
3127 
3128   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3129     simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
3130     simde__m64 r = simde_mm_cvtpd_pi32(a);
3131     simde_test_x86_assert_equal_i32x2(r, simde_x_mm_loadu_si64(test_vec[i].r));
3132   }
3133 
3134   return 0;
3135 }
3136 
3137 static int
test_simde_mm_cvtpd_ps(SIMDE_MUNIT_TEST_ARGS)3138 test_simde_mm_cvtpd_ps(SIMDE_MUNIT_TEST_ARGS) {
3139   const struct {
3140     simde__m128d a;
3141     simde__m128 r;
3142   } test_vec[8] = {
3143     { simde_mm_set_pd(SIMDE_FLOAT64_C(  655.71), SIMDE_FLOAT64_C(  689.41)),
3144       simde_mm_set_ps(SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(  655.71), SIMDE_FLOAT32_C(  689.41)) },
3145     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -50.10), SIMDE_FLOAT64_C( -149.72)),
3146       simde_mm_set_ps(SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(  -50.10), SIMDE_FLOAT32_C( -149.72)) },
3147     { simde_mm_set_pd(SIMDE_FLOAT64_C(  227.42), SIMDE_FLOAT64_C(  655.70)),
3148       simde_mm_set_ps(SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(  227.42), SIMDE_FLOAT32_C(  655.70)) },
3149     { simde_mm_set_pd(SIMDE_FLOAT64_C( -635.17), SIMDE_FLOAT64_C(  938.65)),
3150       simde_mm_set_ps(SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C( -635.17), SIMDE_FLOAT32_C(  938.65)) },
3151     { simde_mm_set_pd(SIMDE_FLOAT64_C(  548.99), SIMDE_FLOAT64_C(  -18.53)),
3152       simde_mm_set_ps(SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(  548.99), SIMDE_FLOAT32_C(  -18.53)) },
3153     { simde_mm_set_pd(SIMDE_FLOAT64_C( -548.71), SIMDE_FLOAT64_C(   31.33)),
3154       simde_mm_set_ps(SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C( -548.71), SIMDE_FLOAT32_C(   31.33)) },
3155     { simde_mm_set_pd(SIMDE_FLOAT64_C( -978.36), SIMDE_FLOAT64_C( -341.93)),
3156       simde_mm_set_ps(SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C( -978.36), SIMDE_FLOAT32_C( -341.93)) },
3157     { simde_mm_set_pd(SIMDE_FLOAT64_C(  211.73), SIMDE_FLOAT64_C(  471.24)),
3158       simde_mm_set_ps(SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(    0.00), SIMDE_FLOAT32_C(  211.73), SIMDE_FLOAT32_C(  471.24)) }
3159   };
3160 
3161   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3162     simde__m128 r = simde_mm_cvtpd_ps(test_vec[i].a);
3163     simde_assert_m128_close(r, test_vec[i].r, 1);
3164   }
3165 
3166   return 0;
3167 }
3168 
3169 static int
test_simde_mm_cvtpi32_pd(SIMDE_MUNIT_TEST_ARGS)3170 test_simde_mm_cvtpi32_pd(SIMDE_MUNIT_TEST_ARGS) {
3171   const struct {
3172     simde__m64 a;
3173     simde__m128d r;
3174   } test_vec[8] = {
3175     { simde_mm_set_pi32(  -42,  -579),
3176       simde_mm_set_pd(SIMDE_FLOAT64_C( -42.00), SIMDE_FLOAT64_C(-579.00)) },
3177     { simde_mm_set_pi32( -633,    29),
3178       simde_mm_set_pd(SIMDE_FLOAT64_C(-633.00), SIMDE_FLOAT64_C(  29.00)) },
3179     { simde_mm_set_pi32( -149,   196),
3180       simde_mm_set_pd(SIMDE_FLOAT64_C(-149.00), SIMDE_FLOAT64_C( 196.00)) },
3181     { simde_mm_set_pi32(  308,  -433),
3182       simde_mm_set_pd(SIMDE_FLOAT64_C( 308.00), SIMDE_FLOAT64_C(-433.00)) },
3183     { simde_mm_set_pi32( -881,   358),
3184       simde_mm_set_pd(SIMDE_FLOAT64_C(-881.00), SIMDE_FLOAT64_C( 358.00)) },
3185     { simde_mm_set_pi32(  723,   273),
3186       simde_mm_set_pd(SIMDE_FLOAT64_C( 723.00), SIMDE_FLOAT64_C( 273.00)) },
3187     { simde_mm_set_pi32( -182,   457),
3188       simde_mm_set_pd(SIMDE_FLOAT64_C(-182.00), SIMDE_FLOAT64_C( 457.00)) },
3189     { simde_mm_set_pi32( -239,  -577),
3190       simde_mm_set_pd(SIMDE_FLOAT64_C(-239.00), SIMDE_FLOAT64_C(-577.00)) }
3191   };
3192 
3193   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3194     simde__m128d r = simde_mm_cvtpi32_pd(test_vec[i].a);
3195     simde_assert_m128d_close(r, test_vec[i].r, 1);
3196   }
3197 
3198   return 0;
3199 }
3200 
3201 static int
test_simde_mm_cvtps_epi32(SIMDE_MUNIT_TEST_ARGS)3202 test_simde_mm_cvtps_epi32(SIMDE_MUNIT_TEST_ARGS) {
3203   static const struct {
3204     const simde_float32 a[4];
3205     const int32_t r[4];
3206   } test_vec[] = {
3207     #if !defined(SIMDE_FAST_NANS)
3208     { {            SIMDE_MATH_NANF,           -SIMDE_MATH_NANF, SIMDE_FLOAT32_C(   718.49), SIMDE_FLOAT32_C(  -765.08) },
3209       {              INT32_MIN,              INT32_MIN,  INT32_C(         718), -INT32_C(         765) } },
3210     #endif
3211     #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3212     { { HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
3213         HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
3214         HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
3215         HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100), },
3216       {             INT32_MIN,  INT32_C(  2147483520),              INT32_MIN, -INT32_C(  2147483520) } },
3217     #endif
3218     #if !defined(SIMDE_FAST_ROUND_TIES)
3219     { { SIMDE_FLOAT32_C(    -1.50), SIMDE_FLOAT32_C(     1.50), SIMDE_FLOAT32_C(    -2.50), SIMDE_FLOAT32_C(     2.50) },
3220       { -INT32_C(           2),  INT32_C(           2), -INT32_C(           2),  INT32_C(         2) } },
3221     { { SIMDE_FLOAT32_C(    -3.50), SIMDE_FLOAT32_C(     3.50), SIMDE_FLOAT32_C(    -4.50), SIMDE_FLOAT32_C(     4.50) },
3222       { -INT32_C(           4),  INT32_C(           4), -INT32_C(           4),  INT32_C(         4) } },
3223     #endif
3224     { { SIMDE_FLOAT32_C(   -95.52), SIMDE_FLOAT32_C(   603.57), SIMDE_FLOAT32_C(  -810.91), SIMDE_FLOAT32_C(   527.98) },
3225       { -INT32_C(          96),  INT32_C(         604), -INT32_C(         811),  INT32_C(         528) } },
3226     { { SIMDE_FLOAT32_C(  -768.18), SIMDE_FLOAT32_C(  -162.82), SIMDE_FLOAT32_C(  -159.43), SIMDE_FLOAT32_C(   588.60) },
3227       { -INT32_C(         768), -INT32_C(         163), -INT32_C(         159),  INT32_C(         589) } },
3228     { { SIMDE_FLOAT32_C(    84.90), SIMDE_FLOAT32_C(  -904.57), SIMDE_FLOAT32_C(  -209.20), SIMDE_FLOAT32_C(   264.55) },
3229       {  INT32_C(          85), -INT32_C(         905), -INT32_C(         209),  INT32_C(         265) } },
3230     { { SIMDE_FLOAT32_C(   -19.50), SIMDE_FLOAT32_C(  -416.92), SIMDE_FLOAT32_C(  -780.86), SIMDE_FLOAT32_C(   -31.81) },
3231       { -INT32_C(          20), -INT32_C(         417), -INT32_C(         781), -INT32_C(          32) } },
3232     { { SIMDE_FLOAT32_C(  -561.41), SIMDE_FLOAT32_C(  -689.14), SIMDE_FLOAT32_C(   434.56), SIMDE_FLOAT32_C(   432.69) },
3233       { -INT32_C(         561), -INT32_C(         689),  INT32_C(         435),  INT32_C(         433) } },
3234     { { SIMDE_FLOAT32_C(   170.13), SIMDE_FLOAT32_C(   594.22), SIMDE_FLOAT32_C(  -888.51), SIMDE_FLOAT32_C(   321.54) },
3235       {  INT32_C(         170),  INT32_C(         594), -INT32_C(         889),  INT32_C(         322) } },
3236     { { SIMDE_FLOAT32_C(   660.47), SIMDE_FLOAT32_C(  -124.04), SIMDE_FLOAT32_C(   493.83), SIMDE_FLOAT32_C(   250.16) },
3237       {  INT32_C(         660), -INT32_C(         124),  INT32_C(         494),  INT32_C(         250) } },
3238     { { SIMDE_FLOAT32_C(  -314.21), SIMDE_FLOAT32_C(   -16.38), SIMDE_FLOAT32_C(   852.78), SIMDE_FLOAT32_C(   590.27) },
3239       { -INT32_C(         314), -INT32_C(          16),  INT32_C(         853),  INT32_C(         590) } },
3240     { { SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0), SIMDE_FLOAT32_C(-2147483650.0) },
3241       { -INT32_C(  2147483648), -INT32_C(  2147483648), -INT32_C(  2147483648), -INT32_C(  2147483648) } },
3242     { { SIMDE_FLOAT32_C( 2147483649.0), SIMDE_FLOAT32_C( 2147483649.0), SIMDE_FLOAT32_C( 2147483649.0), SIMDE_FLOAT32_C( 2147483649.0) },
3243       { -INT32_C(  2147483648), -INT32_C(  2147483648), -INT32_C(  2147483648), -INT32_C(  2147483648) } }
3244   };
3245 
3246   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3247     simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
3248     simde__m128i r = simde_mm_cvtps_epi32(a);
3249     simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
3250   }
3251 
3252   return 0;
3253 }
3254 
3255 static int
test_simde_mm_cvtps_pd(SIMDE_MUNIT_TEST_ARGS)3256 test_simde_mm_cvtps_pd(SIMDE_MUNIT_TEST_ARGS) {
3257   const struct {
3258     simde__m128 a;
3259     simde__m128d r;
3260   } test_vec[8] = {
3261     { simde_mm_set_ps(SIMDE_FLOAT32_C(  295.41), SIMDE_FLOAT32_C( -909.65), SIMDE_FLOAT32_C(  156.64), SIMDE_FLOAT32_C( -802.16)),
3262       simde_mm_set_pd(SIMDE_FLOAT64_C(  156.64), SIMDE_FLOAT64_C( -802.16)) },
3263     { simde_mm_set_ps(SIMDE_FLOAT32_C(  649.83), SIMDE_FLOAT32_C( -763.68), SIMDE_FLOAT32_C(  364.80), SIMDE_FLOAT32_C(  389.19)),
3264       simde_mm_set_pd(SIMDE_FLOAT64_C(  364.80), SIMDE_FLOAT64_C(  389.19)) },
3265     { simde_mm_set_ps(SIMDE_FLOAT32_C(  269.92), SIMDE_FLOAT32_C( -207.13), SIMDE_FLOAT32_C(  538.63), SIMDE_FLOAT32_C(  487.11)),
3266       simde_mm_set_pd(SIMDE_FLOAT64_C(  538.63), SIMDE_FLOAT64_C(  487.11)) },
3267     { simde_mm_set_ps(SIMDE_FLOAT32_C( -982.29), SIMDE_FLOAT32_C(  234.64), SIMDE_FLOAT32_C(  -53.82), SIMDE_FLOAT32_C(  899.43)),
3268       simde_mm_set_pd(SIMDE_FLOAT64_C(  -53.82), SIMDE_FLOAT64_C(  899.43)) },
3269     { simde_mm_set_ps(SIMDE_FLOAT32_C(  945.89), SIMDE_FLOAT32_C(  -98.53), SIMDE_FLOAT32_C(    1.57), SIMDE_FLOAT32_C(   49.07)),
3270       simde_mm_set_pd(SIMDE_FLOAT64_C(    1.57), SIMDE_FLOAT64_C(   49.07)) },
3271     { simde_mm_set_ps(SIMDE_FLOAT32_C(  -47.73), SIMDE_FLOAT32_C(  806.42), SIMDE_FLOAT32_C(   11.76), SIMDE_FLOAT32_C(   -1.19)),
3272       simde_mm_set_pd(SIMDE_FLOAT64_C(   11.76), SIMDE_FLOAT64_C(   -1.19)) },
3273     { simde_mm_set_ps(SIMDE_FLOAT32_C( -961.08), SIMDE_FLOAT32_C( -192.05), SIMDE_FLOAT32_C(  553.30), SIMDE_FLOAT32_C( -994.71)),
3274       simde_mm_set_pd(SIMDE_FLOAT64_C(  553.30), SIMDE_FLOAT64_C( -994.71)) },
3275     { simde_mm_set_ps(SIMDE_FLOAT32_C(    9.30), SIMDE_FLOAT32_C( -203.20), SIMDE_FLOAT32_C( -196.20), SIMDE_FLOAT32_C(  707.05)),
3276       simde_mm_set_pd(SIMDE_FLOAT64_C( -196.20), SIMDE_FLOAT64_C(  707.05)) }
3277   };
3278 
3279   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3280     simde__m128d r = simde_mm_cvtps_pd(test_vec[i].a);
3281     simde_assert_m128d_close(r, test_vec[i].r, 1);
3282   }
3283 
3284   return 0;
3285 }
3286 
3287 static int
test_simde_mm_cvtsd_f64(SIMDE_MUNIT_TEST_ARGS)3288 test_simde_mm_cvtsd_f64(SIMDE_MUNIT_TEST_ARGS) {
3289   const struct {
3290     simde__m128d a;
3291     simde_float64 r;
3292   } test_vec[8] = {
3293     { simde_mm_set_pd(SIMDE_FLOAT64_C(  298.96), SIMDE_FLOAT64_C(   39.67)),
3294       SIMDE_FLOAT64_C(  39.67) },
3295     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -98.64), SIMDE_FLOAT64_C( -641.95)),
3296       SIMDE_FLOAT64_C(-641.95) },
3297     { simde_mm_set_pd(SIMDE_FLOAT64_C( -307.30), SIMDE_FLOAT64_C( -193.04)),
3298       SIMDE_FLOAT64_C(-193.04) },
3299     { simde_mm_set_pd(SIMDE_FLOAT64_C( -648.72), SIMDE_FLOAT64_C(  830.29)),
3300       SIMDE_FLOAT64_C( 830.29) },
3301     { simde_mm_set_pd(SIMDE_FLOAT64_C( -701.20), SIMDE_FLOAT64_C( -501.79)),
3302       SIMDE_FLOAT64_C(-501.79) },
3303     { simde_mm_set_pd(SIMDE_FLOAT64_C(  755.28), SIMDE_FLOAT64_C(  648.10)),
3304       SIMDE_FLOAT64_C( 648.10) },
3305     { simde_mm_set_pd(SIMDE_FLOAT64_C( -664.63), SIMDE_FLOAT64_C(  220.54)),
3306       SIMDE_FLOAT64_C( 220.54) },
3307     { simde_mm_set_pd(SIMDE_FLOAT64_C( -762.92), SIMDE_FLOAT64_C( -101.29)),
3308       SIMDE_FLOAT64_C(-101.29) }
3309   };
3310 
3311   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3312     simde_float64 r = simde_mm_cvtsd_f64(test_vec[i].a);
3313     simde_assert_equal_f64(r, test_vec[i].r, 2);
3314   }
3315 
3316   return 0;
3317 }
3318 
3319 static int
test_simde_mm_cvtsd_si32(SIMDE_MUNIT_TEST_ARGS)3320 test_simde_mm_cvtsd_si32 (SIMDE_MUNIT_TEST_ARGS) {
3321   static const struct {
3322     const simde_float64 a[2];
3323     const int32_t r;
3324   } test_vec[] = {
3325     #if !defined(SIMDE_FAST_NANS)
3326     { {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -162.87) },
3327                  INT32_MIN },
3328     { {            -SIMDE_MATH_NAN, SIMDE_FLOAT64_C(  -905.13) },
3329                  INT32_MIN },
3330     #endif
3331     #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3332     { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1), SIMDE_FLOAT64_C(   177.40) },
3333                   INT32_MIN },
3334     { { HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100, SIMDE_FLOAT64_C(  -906.88) },
3335        INT32_C(  2147483547) },
3336     { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1), SIMDE_FLOAT64_C(   676.90) },
3337                    INT32_MIN },
3338     { { HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100, SIMDE_FLOAT64_C(  -848.13) },
3339       -INT32_C(  2147483548) },
3340     #endif
3341     { { SIMDE_FLOAT64_C(   353.29), SIMDE_FLOAT64_C(   -16.32) },
3342        INT32_C(         353) },
3343     { { SIMDE_FLOAT64_C(   477.70), SIMDE_FLOAT64_C(  -131.04) },
3344        INT32_C(         478) },
3345     { { SIMDE_FLOAT64_C(  -314.42), SIMDE_FLOAT64_C(  -351.80) },
3346       -INT32_C(         314) },
3347     { { SIMDE_FLOAT64_C(  -574.04), SIMDE_FLOAT64_C(  -761.46) },
3348       -INT32_C(         574) },
3349     { { SIMDE_FLOAT64_C(  -428.08), SIMDE_FLOAT64_C(   959.55) },
3350       -INT32_C(         428) },
3351     { { SIMDE_FLOAT64_C(   453.56), SIMDE_FLOAT64_C(  -261.91) },
3352        INT32_C(         454) }
3353   };
3354 
3355   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3356     simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
3357     int32_t r = simde_mm_cvtsd_si32(a);
3358     simde_assert_equal_i32(r, test_vec[i].r);
3359   }
3360 
3361   return 0;
3362 }
3363 
3364 static int
test_simde_mm_cvtsd_si64(SIMDE_MUNIT_TEST_ARGS)3365 test_simde_mm_cvtsd_si64(SIMDE_MUNIT_TEST_ARGS) {
3366   const struct {
3367     simde__m128d a;
3368     int64_t r;
3369   } test_vec[8] = {
3370     { simde_mm_set_pd(SIMDE_FLOAT64_C(  793.30), SIMDE_FLOAT64_C( -706.75)), -707 },
3371     { simde_mm_set_pd(SIMDE_FLOAT64_C(   29.13), SIMDE_FLOAT64_C( -309.00)), -309 },
3372     { simde_mm_set_pd(SIMDE_FLOAT64_C(   21.24), SIMDE_FLOAT64_C(  368.17)),  368 },
3373     { simde_mm_set_pd(SIMDE_FLOAT64_C( -595.30), SIMDE_FLOAT64_C(  351.60)),  352 },
3374     { simde_mm_set_pd(SIMDE_FLOAT64_C( -640.13), SIMDE_FLOAT64_C( -466.84)), -467 },
3375     { simde_mm_set_pd(SIMDE_FLOAT64_C( -237.20), SIMDE_FLOAT64_C( -994.72)), -995 },
3376     { simde_mm_set_pd(SIMDE_FLOAT64_C( -983.23), SIMDE_FLOAT64_C(  645.14)),  645 },
3377     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -89.10), SIMDE_FLOAT64_C(  585.69)),  586 }
3378   };
3379 
3380   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3381     int64_t r = simde_mm_cvtsd_si64(test_vec[i].a);
3382     simde_assert_equal_i64(r, test_vec[i].r);
3383   }
3384 
3385   return 0;
3386 }
3387 
3388 static int
test_simde_mm_cvtsd_ss(SIMDE_MUNIT_TEST_ARGS)3389 test_simde_mm_cvtsd_ss(SIMDE_MUNIT_TEST_ARGS) {
3390   const struct {
3391     simde__m128 a;
3392     simde__m128d b;
3393     simde__m128 r;
3394   } test_vec[8] = {
3395     { simde_mm_set_ps(SIMDE_FLOAT32_C(  522.41), SIMDE_FLOAT32_C(  122.44), SIMDE_FLOAT32_C(  708.76), SIMDE_FLOAT32_C(  910.97)),
3396       simde_mm_set_pd(SIMDE_FLOAT64_C(  -52.04), SIMDE_FLOAT64_C(  228.75)),
3397       simde_mm_set_ps(SIMDE_FLOAT32_C(  522.41), SIMDE_FLOAT32_C(  122.44), SIMDE_FLOAT32_C(  708.76), SIMDE_FLOAT32_C(  228.75)) },
3398     { simde_mm_set_ps(SIMDE_FLOAT32_C( -311.44), SIMDE_FLOAT32_C(  267.00), SIMDE_FLOAT32_C(  965.23), SIMDE_FLOAT32_C( -248.92)),
3399       simde_mm_set_pd(SIMDE_FLOAT64_C(  -89.48), SIMDE_FLOAT64_C(  178.71)),
3400       simde_mm_set_ps(SIMDE_FLOAT32_C( -311.44), SIMDE_FLOAT32_C(  267.00), SIMDE_FLOAT32_C(  965.23), SIMDE_FLOAT32_C(  178.71)) },
3401     { simde_mm_set_ps(SIMDE_FLOAT32_C(  252.50), SIMDE_FLOAT32_C(  744.35), SIMDE_FLOAT32_C(  237.50), SIMDE_FLOAT32_C(  713.77)),
3402       simde_mm_set_pd(SIMDE_FLOAT64_C( -913.96), SIMDE_FLOAT64_C(  935.45)),
3403       simde_mm_set_ps(SIMDE_FLOAT32_C(  252.50), SIMDE_FLOAT32_C(  744.35), SIMDE_FLOAT32_C(  237.50), SIMDE_FLOAT32_C(  935.45)) },
3404     { simde_mm_set_ps(SIMDE_FLOAT32_C(  781.08), SIMDE_FLOAT32_C(  -50.03), SIMDE_FLOAT32_C( -658.11), SIMDE_FLOAT32_C(  945.59)),
3405       simde_mm_set_pd(SIMDE_FLOAT64_C( -556.84), SIMDE_FLOAT64_C(  452.90)),
3406       simde_mm_set_ps(SIMDE_FLOAT32_C(  781.08), SIMDE_FLOAT32_C(  -50.03), SIMDE_FLOAT32_C( -658.11), SIMDE_FLOAT32_C(  452.90)) },
3407     { simde_mm_set_ps(SIMDE_FLOAT32_C(  459.13), SIMDE_FLOAT32_C(  794.72), SIMDE_FLOAT32_C(  105.91), SIMDE_FLOAT32_C(  688.90)),
3408       simde_mm_set_pd(SIMDE_FLOAT64_C( -123.20), SIMDE_FLOAT64_C(  469.36)),
3409       simde_mm_set_ps(SIMDE_FLOAT32_C(  459.13), SIMDE_FLOAT32_C(  794.72), SIMDE_FLOAT32_C(  105.91), SIMDE_FLOAT32_C(  469.36)) },
3410     { simde_mm_set_ps(SIMDE_FLOAT32_C( -699.67), SIMDE_FLOAT32_C(  751.26), SIMDE_FLOAT32_C(   72.14), SIMDE_FLOAT32_C( -162.03)),
3411       simde_mm_set_pd(SIMDE_FLOAT64_C(  868.66), SIMDE_FLOAT64_C(  138.18)),
3412       simde_mm_set_ps(SIMDE_FLOAT32_C( -699.67), SIMDE_FLOAT32_C(  751.26), SIMDE_FLOAT32_C(   72.14), SIMDE_FLOAT32_C(  138.18)) },
3413     { simde_mm_set_ps(SIMDE_FLOAT32_C( -144.80), SIMDE_FLOAT32_C(  372.44), SIMDE_FLOAT32_C( -878.31), SIMDE_FLOAT32_C(  984.43)),
3414       simde_mm_set_pd(SIMDE_FLOAT64_C( -559.54), SIMDE_FLOAT64_C(  112.58)),
3415       simde_mm_set_ps(SIMDE_FLOAT32_C( -144.80), SIMDE_FLOAT32_C(  372.44), SIMDE_FLOAT32_C( -878.31), SIMDE_FLOAT32_C(  112.58)) },
3416     { simde_mm_set_ps(SIMDE_FLOAT32_C( -958.65), SIMDE_FLOAT32_C(  333.33), SIMDE_FLOAT32_C( -940.30), SIMDE_FLOAT32_C(  396.81)),
3417       simde_mm_set_pd(SIMDE_FLOAT64_C(  263.65), SIMDE_FLOAT64_C(  199.76)),
3418       simde_mm_set_ps(SIMDE_FLOAT32_C( -958.65), SIMDE_FLOAT32_C(  333.33), SIMDE_FLOAT32_C( -940.30), SIMDE_FLOAT32_C(  199.76)) }
3419   };
3420 
3421   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3422     simde__m128 r = simde_mm_cvtsd_ss(test_vec[i].a, test_vec[i].b);
3423     simde_assert_m128_close(r, test_vec[i].r, 1);
3424   }
3425 
3426   return 0;
3427 }
3428 
3429 static int
test_simde_x_mm_cvtsi128_si16(SIMDE_MUNIT_TEST_ARGS)3430 test_simde_x_mm_cvtsi128_si16 (SIMDE_MUNIT_TEST_ARGS) {
3431   static const struct {
3432     const int16_t a[8];
3433     const int16_t r;
3434   } test_vec[] = {
3435     { { -INT16_C( 30955),  INT16_C(   704), -INT16_C( 12934), -INT16_C( 10158),  INT16_C( 23505),  INT16_C( 18623), -INT16_C( 30715),  INT16_C( 30631) },
3436       -INT16_C( 30955) },
3437     { { -INT16_C( 18124), -INT16_C(  9599), -INT16_C( 23005), -INT16_C(  5882), -INT16_C( 24114),  INT16_C( 22410),  INT16_C( 23298),  INT16_C(  6106) },
3438       -INT16_C( 18124) },
3439     { { -INT16_C( 25630),  INT16_C( 23577),  INT16_C( 27496),  INT16_C( 14645), -INT16_C(  2874), -INT16_C( 13439),  INT16_C( 10620), -INT16_C( 20158) },
3440       -INT16_C( 25630) },
3441     { { -INT16_C( 15390),  INT16_C(  1675), -INT16_C( 28310),  INT16_C( 14575),  INT16_C( 31026),  INT16_C( 13455),  INT16_C( 27348), -INT16_C( 18613) },
3442       -INT16_C( 15390) },
3443     { {  INT16_C( 25605),  INT16_C( 27923),  INT16_C( 18639), -INT16_C( 27226),  INT16_C( 10301), -INT16_C( 18079), -INT16_C( 23727),  INT16_C( 13162) },
3444        INT16_C( 25605) },
3445     { { -INT16_C(  2713), -INT16_C( 11975),  INT16_C( 10630), -INT16_C( 18423), -INT16_C( 26206),  INT16_C( 30700),  INT16_C( 14083),  INT16_C(  2094) },
3446       -INT16_C(  2713) },
3447     { {  INT16_C( 16795),  INT16_C( 27253),  INT16_C(  7050), -INT16_C( 14592),  INT16_C( 24899), -INT16_C( 27520), -INT16_C(  5372),  INT16_C( 27592) },
3448        INT16_C( 16795) },
3449     { {  INT16_C(   480),  INT16_C( 26428),  INT16_C( 17962), -INT16_C( 13025),  INT16_C(  3295), -INT16_C(  7612),  INT16_C( 29251), -INT16_C(  8214) },
3450        INT16_C(   480) }
3451   };
3452 
3453   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3454     simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a);
3455     int16_t r = simde_x_mm_cvtsi128_si16(a);
3456     simde_assert_equal_i16(r, test_vec[i].r);
3457   }
3458 
3459   return 0;
3460 }
3461 
3462 static int
test_simde_mm_cvtsi128_si32(SIMDE_MUNIT_TEST_ARGS)3463 test_simde_mm_cvtsi128_si32(SIMDE_MUNIT_TEST_ARGS) {
3464   const struct {
3465     simde__m128i a;
3466     int32_t r;
3467   } test_vec[8] = {
3468     { simde_mm_set_epi32(   26453550,  -127780894,   765191664, -1527053336), -1527053336 },
3469     { simde_mm_set_epi32(-2072408746,   654549051, -1035182329,  -310311602),  -310311602 },
3470     { simde_mm_set_epi32(-1491944780,  -848128842,   200170171,  -471300206),  -471300206 },
3471     { simde_mm_set_epi32(-1218501110,   680592926,  -869682471,  -297305797),  -297305797 },
3472     { simde_mm_set_epi32(-1884581495,  -571508262,  -111379645, -1274133785), -1274133785 },
3473     { simde_mm_set_epi32(  486988098,   416284528,  1359642222,   197671232),   197671232 },
3474     { simde_mm_set_epi32(  296562088, -1151305617, -1413122888, -1640910233), -1640910233 },
3475     { simde_mm_set_epi32(-1262725255, -1253335394,   -91416000, -1892793314), -1892793314 }
3476   };
3477 
3478   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3479     int32_t r = simde_mm_cvtsi128_si32(test_vec[i].a);
3480     simde_assert_equal_i32(r, test_vec[i].r);
3481   }
3482 
3483   return 0;
3484 }
3485 
3486 static int
test_simde_mm_cvtsi64_si128(SIMDE_MUNIT_TEST_ARGS)3487 test_simde_mm_cvtsi64_si128(SIMDE_MUNIT_TEST_ARGS) {
3488   const struct {
3489     int64_t a;
3490     simde__m128i r;
3491   } test_vec[8] = {
3492     { INT64_C(   6168135010467220065),
3493       simde_mm_set_epi64x(INT64_C(                     0), INT64_C(   6168135010467220065)) },
3494     { INT64_C(   3895170522828645721),
3495       simde_mm_set_epi64x(INT64_C(                     0), INT64_C(   3895170522828645721)) },
3496     { INT64_C(  -3378210069702593578),
3497       simde_mm_set_epi64x(INT64_C(                     0), INT64_C(  -3378210069702593578)) },
3498     { INT64_C(   2750396577149404222),
3499       simde_mm_set_epi64x(INT64_C(                     0), INT64_C(   2750396577149404222)) },
3500     { INT64_C(   1438311486113044813),
3501       simde_mm_set_epi64x(INT64_C(                     0), INT64_C(   1438311486113044813)) },
3502     { INT64_C(   3416877519561179684),
3503       simde_mm_set_epi64x(INT64_C(                     0), INT64_C(   3416877519561179684)) },
3504     { INT64_C(   5633937201227624265),
3505       simde_mm_set_epi64x(INT64_C(                     0), INT64_C(   5633937201227624265)) },
3506     { INT64_C(  -3544191055453826903),
3507       simde_mm_set_epi64x(INT64_C(                     0), INT64_C(  -3544191055453826903)) }
3508   };
3509 
3510   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3511     simde__m128i r = simde_mm_cvtsi64_si128(test_vec[i].a);
3512     simde_assert_m128i_i64(r, ==, test_vec[i].r);
3513   }
3514 
3515   return 0;
3516 }
3517 
3518 static int
test_simde_mm_cvtsi128_si64(SIMDE_MUNIT_TEST_ARGS)3519 test_simde_mm_cvtsi128_si64(SIMDE_MUNIT_TEST_ARGS) {
3520   const struct {
3521     simde__m128i a;
3522     int64_t r;
3523   } test_vec[8] = {
3524     { simde_mm_set_epi64x(INT64_C(   6773505374496819552), INT64_C(   4667650958864037640)),
3525       INT64_C(   4667650958864037640) },
3526     { simde_mm_set_epi64x(INT64_C(   1327994882711935975), INT64_C(   6055234041306631062)),
3527       INT64_C(   6055234041306631062) },
3528     { simde_mm_set_epi64x(INT64_C(   8972445642279437044), INT64_C(  -4761409530754735793)),
3529       INT64_C(  -4761409530754735793) },
3530     { simde_mm_set_epi64x(INT64_C(   7460890732678939925), INT64_C(   5266150742597997743)),
3531       INT64_C(   5266150742597997743) },
3532     { simde_mm_set_epi64x(INT64_C(  -6075061397734634308), INT64_C(    487741331498539771)),
3533       INT64_C(    487741331498539771) },
3534     { simde_mm_set_epi64x(INT64_C(   2874947710909797095), INT64_C(   2287065406213692181)),
3535       INT64_C(   2287065406213692181) },
3536     { simde_mm_set_epi64x(INT64_C(   8598185467708417568), INT64_C(  -2745610728130306920)),
3537       INT64_C(  -2745610728130306920) },
3538     { simde_mm_set_epi64x(INT64_C(   6122366414867950497), INT64_C(    614503884136124395)),
3539       INT64_C(    614503884136124395) }
3540   };
3541 
3542   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3543     int64_t r = simde_mm_cvtsi128_si64(test_vec[i].a);
3544     simde_assert_equal_i64(r, test_vec[i].r);
3545   }
3546 
3547   return 0;
3548 }
3549 
3550 static int
test_simde_x_mm_cvtsi16_si128(SIMDE_MUNIT_TEST_ARGS)3551 test_simde_x_mm_cvtsi16_si128 (SIMDE_MUNIT_TEST_ARGS) {
3552   static const struct {
3553     const int16_t a;
3554     const int16_t r[8];
3555   } test_vec[] = {
3556     { -INT16_C( 17602),
3557       { -INT16_C( 17602),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
3558     {  INT16_C( 26279),
3559       {  INT16_C( 26279),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
3560     { -INT16_C( 15939),
3561       { -INT16_C( 15939),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
3562     { -INT16_C(  9973),
3563       { -INT16_C(  9973),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
3564     { -INT16_C(  7532),
3565       { -INT16_C(  7532),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
3566     {  INT16_C(  4549),
3567       {  INT16_C(  4549),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
3568     {  INT16_C(  6325),
3569       {  INT16_C(  6325),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
3570     { -INT16_C(  6958),
3571       { -INT16_C(  6958),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } }
3572   };
3573 
3574   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3575     int16_t a = test_vec[i].a;
3576     simde__m128i r = simde_x_mm_cvtsi16_si128(a);
3577     simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r));
3578   }
3579 
3580   return 0;
3581 }
3582 
3583 static int
test_simde_mm_cvtsi32_sd(SIMDE_MUNIT_TEST_ARGS)3584 test_simde_mm_cvtsi32_sd(SIMDE_MUNIT_TEST_ARGS) {
3585   const struct {
3586     simde__m128d a;
3587     int b;
3588     simde__m128d r;
3589   } test_vec[8] = {
3590     { simde_mm_set_pd(SIMDE_FLOAT64_C( -601.07), SIMDE_FLOAT64_C(  516.34)),
3591       -768,
3592       simde_mm_set_pd(SIMDE_FLOAT64_C( -601.07), SIMDE_FLOAT64_C( -768.00)) },
3593     { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.83), SIMDE_FLOAT64_C(  135.41)),
3594       -383,
3595       simde_mm_set_pd(SIMDE_FLOAT64_C( -939.83), SIMDE_FLOAT64_C( -383.00)) },
3596     { simde_mm_set_pd(SIMDE_FLOAT64_C(  503.65), SIMDE_FLOAT64_C(  859.44)),
3597        872,
3598       simde_mm_set_pd(SIMDE_FLOAT64_C(  503.65), SIMDE_FLOAT64_C(  872.00)) },
3599     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -76.44), SIMDE_FLOAT64_C(  854.87)),
3600        613,
3601       simde_mm_set_pd(SIMDE_FLOAT64_C(  -76.44), SIMDE_FLOAT64_C(  613.00)) },
3602     { simde_mm_set_pd(SIMDE_FLOAT64_C(  520.67), SIMDE_FLOAT64_C( -993.40)),
3603        197,
3604       simde_mm_set_pd(SIMDE_FLOAT64_C(  520.67), SIMDE_FLOAT64_C(  197.00)) },
3605     { simde_mm_set_pd(SIMDE_FLOAT64_C(  461.59), SIMDE_FLOAT64_C( -572.51)),
3606       -157,
3607       simde_mm_set_pd(SIMDE_FLOAT64_C(  461.59), SIMDE_FLOAT64_C( -157.00)) },
3608     { simde_mm_set_pd(SIMDE_FLOAT64_C( -113.22), SIMDE_FLOAT64_C(  791.22)),
3609       -840,
3610       simde_mm_set_pd(SIMDE_FLOAT64_C( -113.22), SIMDE_FLOAT64_C( -840.00)) },
3611     { simde_mm_set_pd(SIMDE_FLOAT64_C(  707.47), SIMDE_FLOAT64_C(  954.02)),
3612       -347,
3613       simde_mm_set_pd(SIMDE_FLOAT64_C(  707.47), SIMDE_FLOAT64_C( -347.00)) }
3614   };
3615 
3616   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3617     simde__m128d r = simde_mm_cvtsi32_sd(test_vec[i].a, test_vec[i].b);
3618     simde_assert_m128d_close(r, test_vec[i].r, 1);
3619   }
3620 
3621   return 0;
3622 }
3623 
3624 static int
test_simde_mm_cvtsi32_si128(SIMDE_MUNIT_TEST_ARGS)3625 test_simde_mm_cvtsi32_si128(SIMDE_MUNIT_TEST_ARGS) {
3626   const struct {
3627     int32_t a;
3628     simde__m128i r;
3629   } test_vec[8] = {
3630     {   306582644, simde_mm_set_epi32(0, 0, 0,  306582644) },
3631     {  -365974780, simde_mm_set_epi32(0, 0, 0, -365974780) },
3632     {   -85065628, simde_mm_set_epi32(0, 0, 0,  -85065628) },
3633     {  1053254834, simde_mm_set_epi32(0, 0, 0, 1053254834) },
3634     {  -236294791, simde_mm_set_epi32(0, 0, 0, -236294791) },
3635     {  1341442607, simde_mm_set_epi32(0, 0, 0, 1341442607) },
3636     {   336976017, simde_mm_set_epi32(0, 0, 0,  336976017) },
3637     {  1400276059, simde_mm_set_epi32(0, 0, 0, 1400276059) }
3638   };
3639 
3640   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3641     simde__m128i r = simde_mm_cvtsi32_si128(test_vec[i].a);
3642     simde_assert_m128i_i32(r, ==, test_vec[i].r);
3643   }
3644 
3645   return 0;
3646 }
3647 
3648 static int
test_simde_mm_cvtsi64_sd(SIMDE_MUNIT_TEST_ARGS)3649 test_simde_mm_cvtsi64_sd(SIMDE_MUNIT_TEST_ARGS) {
3650   const struct {
3651     simde__m128d a;
3652     int64_t b;
3653     simde__m128d r;
3654   } test_vec[8] = {
3655     { simde_mm_set_pd(SIMDE_FLOAT64_C(  229.24), SIMDE_FLOAT64_C(  177.04)),  637,
3656       simde_mm_set_pd(SIMDE_FLOAT64_C(  229.24), SIMDE_FLOAT64_C(  637.00)) },
3657     { simde_mm_set_pd(SIMDE_FLOAT64_C(  401.10), SIMDE_FLOAT64_C(  284.52)), -162,
3658       simde_mm_set_pd(SIMDE_FLOAT64_C(  401.10), SIMDE_FLOAT64_C( -162.00)) },
3659     { simde_mm_set_pd(SIMDE_FLOAT64_C(  499.46), SIMDE_FLOAT64_C(  321.47)), -540,
3660       simde_mm_set_pd(SIMDE_FLOAT64_C(  499.46), SIMDE_FLOAT64_C( -540.00)) },
3661     { simde_mm_set_pd(SIMDE_FLOAT64_C( -710.92), SIMDE_FLOAT64_C(  858.14)),  -64,
3662       simde_mm_set_pd(SIMDE_FLOAT64_C( -710.92), SIMDE_FLOAT64_C(  -64.00)) },
3663     { simde_mm_set_pd(SIMDE_FLOAT64_C( -289.27), SIMDE_FLOAT64_C( -887.54)), -238,
3664       simde_mm_set_pd(SIMDE_FLOAT64_C( -289.27), SIMDE_FLOAT64_C( -238.00)) },
3665     { simde_mm_set_pd(SIMDE_FLOAT64_C(  865.34), SIMDE_FLOAT64_C(  242.15)),  121,
3666       simde_mm_set_pd(SIMDE_FLOAT64_C(  865.34), SIMDE_FLOAT64_C(  121.00)) },
3667     { simde_mm_set_pd(SIMDE_FLOAT64_C( -376.10), SIMDE_FLOAT64_C( -965.52)),  315,
3668       simde_mm_set_pd(SIMDE_FLOAT64_C( -376.10), SIMDE_FLOAT64_C(  315.00)) },
3669     { simde_mm_set_pd(SIMDE_FLOAT64_C(  673.51), SIMDE_FLOAT64_C( -882.88)),  -72,
3670       simde_mm_set_pd(SIMDE_FLOAT64_C(  673.51), SIMDE_FLOAT64_C(  -72.00)) }
3671   };
3672 
3673   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3674     simde__m128d r = simde_mm_cvtsi64_sd(test_vec[i].a, test_vec[i].b);
3675     simde_assert_m128d_close(r, test_vec[i].r, 1);
3676   }
3677 
3678   return 0;
3679 }
3680 
3681 static int
test_simde_mm_cvtss_sd(SIMDE_MUNIT_TEST_ARGS)3682 test_simde_mm_cvtss_sd(SIMDE_MUNIT_TEST_ARGS) {
3683   const struct {
3684     simde__m128d a;
3685     simde__m128 b;
3686     simde__m128d r;
3687   } test_vec[8] = {
3688     { simde_mm_set_pd(SIMDE_FLOAT64_C( 546.48), SIMDE_FLOAT64_C( 729.20)),
3689       simde_mm_set_ps(SIMDE_FLOAT32_C( 142.68), SIMDE_FLOAT32_C( -75.76), SIMDE_FLOAT32_C(-648.72), SIMDE_FLOAT32_C( 148.36)),
3690       simde_mm_set_pd(SIMDE_FLOAT64_C( 546.48), SIMDE_FLOAT64_C( 148.36)) },
3691     { simde_mm_set_pd(SIMDE_FLOAT64_C(  79.60), SIMDE_FLOAT64_C( 648.49)),
3692       simde_mm_set_ps(SIMDE_FLOAT32_C( 631.34), SIMDE_FLOAT32_C( 902.53), SIMDE_FLOAT32_C( -54.65), SIMDE_FLOAT32_C( 614.98)),
3693       simde_mm_set_pd(SIMDE_FLOAT64_C(  79.60), SIMDE_FLOAT64_C( 614.98)) },
3694     { simde_mm_set_pd(SIMDE_FLOAT64_C( 811.47), SIMDE_FLOAT64_C( -95.71)),
3695       simde_mm_set_ps(SIMDE_FLOAT32_C(  13.27), SIMDE_FLOAT32_C( 315.63), SIMDE_FLOAT32_C( 407.80), SIMDE_FLOAT32_C(-826.61)),
3696       simde_mm_set_pd(SIMDE_FLOAT64_C( 811.47), SIMDE_FLOAT64_C(-826.61)) },
3697     { simde_mm_set_pd(SIMDE_FLOAT64_C( 315.42), SIMDE_FLOAT64_C( -69.06)),
3698       simde_mm_set_ps(SIMDE_FLOAT32_C( 775.15), SIMDE_FLOAT32_C( 935.54), SIMDE_FLOAT32_C(-964.44), SIMDE_FLOAT32_C( 659.62)),
3699       simde_mm_set_pd(SIMDE_FLOAT64_C( 315.42), SIMDE_FLOAT64_C( 659.62)) },
3700     { simde_mm_set_pd(SIMDE_FLOAT64_C(-579.75), SIMDE_FLOAT64_C(-291.65)),
3701       simde_mm_set_ps(SIMDE_FLOAT32_C( 533.61), SIMDE_FLOAT32_C( 565.53), SIMDE_FLOAT32_C( -36.93), SIMDE_FLOAT32_C(  57.54)),
3702       simde_mm_set_pd(SIMDE_FLOAT64_C(-579.75), SIMDE_FLOAT64_C(  57.54)) },
3703     { simde_mm_set_pd(SIMDE_FLOAT64_C( 979.09), SIMDE_FLOAT64_C(-471.44)),
3704       simde_mm_set_ps(SIMDE_FLOAT32_C( 927.62), SIMDE_FLOAT32_C( 955.93), SIMDE_FLOAT32_C(-964.80), SIMDE_FLOAT32_C( 823.88)),
3705       simde_mm_set_pd(SIMDE_FLOAT64_C( 979.09), SIMDE_FLOAT64_C( 823.88)) },
3706     { simde_mm_set_pd(SIMDE_FLOAT64_C( 172.44), SIMDE_FLOAT64_C(-427.74)),
3707       simde_mm_set_ps(SIMDE_FLOAT32_C(-343.18), SIMDE_FLOAT32_C(-352.03), SIMDE_FLOAT32_C(-836.30), SIMDE_FLOAT32_C( -61.82)),
3708       simde_mm_set_pd(SIMDE_FLOAT64_C( 172.44), SIMDE_FLOAT64_C( -61.82)) },
3709     { simde_mm_set_pd(SIMDE_FLOAT64_C( 975.42), SIMDE_FLOAT64_C( 394.72)),
3710       simde_mm_set_ps(SIMDE_FLOAT32_C( 748.90), SIMDE_FLOAT32_C(-410.84), SIMDE_FLOAT32_C( 636.92), SIMDE_FLOAT32_C( 230.31)),
3711       simde_mm_set_pd(SIMDE_FLOAT64_C( 975.42), SIMDE_FLOAT64_C( 230.31)) }
3712   };
3713 
3714   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3715     simde__m128d r = simde_mm_cvtss_sd(test_vec[i].a, test_vec[i].b);
3716     simde_assert_m128d_close(r, test_vec[i].r, 2);
3717   }
3718 
3719   return 0;
3720 }
3721 
3722 static int
test_simde_mm_cvttpd_epi32(SIMDE_MUNIT_TEST_ARGS)3723 test_simde_mm_cvttpd_epi32 (SIMDE_MUNIT_TEST_ARGS) {
3724   static const struct {
3725     const simde_float64 a[2];
3726     const int32_t r[4];
3727   } test_vec[] = {
3728     #if !defined(SIMDE_FAST_NANS)
3729     { {             SIMDE_MATH_NAN,           -SIMDE_MATH_NAN },
3730       {            INT32_MIN,              INT32_MIN,  INT32_C(           0),  INT32_C(           0) } },
3731     #endif
3732     #if !defined(SIMDE_FAST_CONVERSION_RANGE) && 0
3733     { { SIMDE_FLOAT64_C(   524.21), SIMDE_FLOAT64_C(    51.51) },
3734       {  INT32_C(         524),  INT32_C(          51),  INT32_C(           0),  INT32_C(           0) } },
3735     { { SIMDE_FLOAT64_C(   146.80), SIMDE_FLOAT64_C(  -434.11) },
3736       {  INT32_C(         146), -INT32_C(         434),  INT32_C(           0),  INT32_C(           0) } },
3737     { { SIMDE_FLOAT64_C(  -150.72), SIMDE_FLOAT64_C(   743.64) },
3738     #endif
3739     { { SIMDE_FLOAT64_C(   788.74), SIMDE_FLOAT64_C(   212.17) },
3740       {  INT32_C(         788),  INT32_C(         212),  INT32_C(           0),  INT32_C(           0) } },
3741     { { SIMDE_FLOAT64_C(  -172.36), SIMDE_FLOAT64_C(   455.86) },
3742       { -INT32_C(         172),  INT32_C(         455),  INT32_C(           0),  INT32_C(           0) } },
3743     { { SIMDE_FLOAT64_C(  -728.09), SIMDE_FLOAT64_C(   893.73) },
3744       { -INT32_C(         728),  INT32_C(         893),  INT32_C(           0),  INT32_C(           0) } },
3745     { { SIMDE_FLOAT64_C(   333.21), SIMDE_FLOAT64_C(  -914.29) },
3746       {  INT32_C(         333), -INT32_C(         914),  INT32_C(           0),  INT32_C(           0) } },
3747     { { SIMDE_FLOAT64_C(     0.95), SIMDE_FLOAT64_C(   701.07) },
3748       {  INT32_C(           0),  INT32_C(         701),  INT32_C(           0),  INT32_C(           0) } },
3749     { { SIMDE_FLOAT64_C(   639.75), SIMDE_FLOAT64_C(  -803.13) },
3750       {  INT32_C(         639), -INT32_C(         803),  INT32_C(           0),  INT32_C(           0) } }
3751   };
3752 
3753   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3754     simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
3755     simde__m128i r = simde_mm_cvttpd_epi32(a);
3756     simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
3757   }
3758 
3759   return 0;
3760 }
3761 
3762 static int
3763 test_simde_mm_cvttpd_pi32 (SIMDE_MUNIT_TEST_ARGS) {
3764   static const struct {
3765     const simde_float64 a[2];
3766     const int32_t r[2];
3767   } test_vec[] = {
3768     #if !defined(SIMDE_FAST_NANS)
3769     { {             SIMDE_MATH_NAN,           -SIMDE_MATH_NAN },
3770       {            INT32_MIN,              INT32_MIN } },
3771     #endif
3772     #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3773     { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
3774         HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100) },
3775       {             INT32_MIN,   INT32_C(   2147483547) } },
3776     { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
3777         HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100) },
3778       {             INT32_MIN,  -INT32_C(   2147483548) } },
3779     #endif
3780     { { SIMDE_FLOAT64_C(   788.74), SIMDE_FLOAT64_C(   212.17) },
3781       {  INT32_C(         788),  INT32_C(         212) } },
3782     { { SIMDE_FLOAT64_C(  -172.36), SIMDE_FLOAT64_C(   455.86) },
3783       { -INT32_C(         172),  INT32_C(         455) } },
3784     { { SIMDE_FLOAT64_C(  -728.09), SIMDE_FLOAT64_C(   893.73) },
3785       { -INT32_C(         728),  INT32_C(         893) } },
3786     { { SIMDE_FLOAT64_C(   333.21), SIMDE_FLOAT64_C(  -914.29) },
3787       {  INT32_C(         333), -INT32_C(         914) } },
3788     { { SIMDE_FLOAT64_C(     0.95), SIMDE_FLOAT64_C(   701.07) },
3789       {  INT32_C(           0),  INT32_C(         701) } },
3790     { { SIMDE_FLOAT64_C(   639.75), SIMDE_FLOAT64_C(  -803.13) },
3791       {  INT32_C(         639), -INT32_C(         803) } }
3792   };
3793 
3794   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3795     simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
3796     simde__m64 r = simde_mm_cvttpd_pi32(a);
3797     simde_test_x86_assert_equal_i32x2(r, simde_x_mm_load_si64(test_vec[i].r));
3798   }
3799 
3800   return 0;
3801 }
3802 
3803 static int
3804 test_simde_mm_cvttps_epi32 (SIMDE_MUNIT_TEST_ARGS) {
3805   static const struct {
3806     const simde_float32 a[4];
3807     const int32_t r[4];
3808   } test_vec[] = {
3809     #if !defined(SIMDE_FAST_NANS)
3810     { {            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -859.90),            SIMDE_MATH_NANF, SIMDE_FLOAT32_C(  -330.33) },
3811       {            INT32_MIN, -INT32_C(         859),            INT32_MIN, -INT32_C(         330) } },
3812     #endif
3813     #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3814     { { HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
3815         HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
3816         HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
3817         HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100) },
3818       {            INT32_MIN,  INT32_C(   2147483520),             INT32_MIN, -INT32_C(    2147483520) } },
3819     #endif
3820     { { SIMDE_FLOAT32_C(   884.61), SIMDE_FLOAT32_C(   424.21), SIMDE_FLOAT32_C(   434.23), SIMDE_FLOAT32_C(  -865.32) },
3821       {  INT32_C(         884),  INT32_C(         424),  INT32_C(         434), -INT32_C(         865) } },
3822     { { SIMDE_FLOAT32_C(     3.17), SIMDE_FLOAT32_C(  -163.40), SIMDE_FLOAT32_C(  -490.56), SIMDE_FLOAT32_C(   628.48) },
3823       {  INT32_C(           3), -INT32_C(         163), -INT32_C(         490),  INT32_C(         628) } },
3824     { { SIMDE_FLOAT32_C(   629.16), SIMDE_FLOAT32_C(   267.90), SIMDE_FLOAT32_C(   468.27), SIMDE_FLOAT32_C(   765.29) },
3825       {  INT32_C(         629),  INT32_C(         267),  INT32_C(         468),  INT32_C(         765) } },
3826     { { SIMDE_FLOAT32_C(  -532.39), SIMDE_FLOAT32_C(   448.09), SIMDE_FLOAT32_C(   543.36), SIMDE_FLOAT32_C(  -643.97) },
3827       { -INT32_C(         532),  INT32_C(         448),  INT32_C(         543), -INT32_C(         643) } },
3828     { { SIMDE_FLOAT32_C(  -958.61), SIMDE_FLOAT32_C(  -434.16), SIMDE_FLOAT32_C(   958.20), SIMDE_FLOAT32_C(   749.69) },
3829       { -INT32_C(         958), -INT32_C(         434),  INT32_C(         958),  INT32_C(         749) } },
3830     { { SIMDE_FLOAT32_C(   379.97), SIMDE_FLOAT32_C(  -697.16), SIMDE_FLOAT32_C(   790.54), SIMDE_FLOAT32_C(  -387.37) },
3831       {  INT32_C(         379), -INT32_C(         697),  INT32_C(         790), -INT32_C(         387) } },
3832     { { SIMDE_FLOAT32_C(  -785.26), SIMDE_FLOAT32_C(   403.54), SIMDE_FLOAT32_C(  -475.03), SIMDE_FLOAT32_C(  -577.41) },
3833       { -INT32_C(         785),  INT32_C(         403), -INT32_C(         475), -INT32_C(         577) } }
3834   };
3835 
3836   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3837     simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
3838     simde__m128i r = simde_mm_cvttps_epi32(a);
3839     simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
3840   }
3841 
3842   return 0;
3843 }
3844 
3845 static int
3846 test_simde_mm_cvttsd_si32 (SIMDE_MUNIT_TEST_ARGS) {
3847   static const struct {
3848     const simde_float64 a[2];
3849     const int32_t r;
3850   } test_vec[] = {
3851     #if !defined(SIMDE_FAST_NANS)
3852     { {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   248.78) },
3853                  INT32_MIN },
3854     { {            -SIMDE_MATH_NAN, SIMDE_FLOAT64_C(   139.38) },
3855                  INT32_MIN },
3856     #endif
3857     #if !defined(SIMDE_FAST_CONVERSION_RANGE)
3858     { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
3859         SIMDE_FLOAT64_C(  -850.89) },
3860       -INT32_C(  2147483648) },
3861     { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
3862         SIMDE_FLOAT64_C(   -30.56) },
3863       -INT32_C(  2147483648) },
3864     { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
3865         SIMDE_FLOAT64_C(  -742.09) },
3866        INT32_C(  2147483547) },
3867     { { HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100),
3868         SIMDE_FLOAT64_C(  -496.27) },
3869       -INT32_C(  2147483548) },
3870     #endif
3871     { { SIMDE_FLOAT64_C(   -57.42), SIMDE_FLOAT64_C(  -705.99) },
3872       -INT32_C(          57) },
3873     { { SIMDE_FLOAT64_C(   737.15), SIMDE_FLOAT64_C(  -394.42) },
3874        INT32_C(         737) },
3875     { { SIMDE_FLOAT64_C(  -193.78), SIMDE_FLOAT64_C(     0.85) },
3876       -INT32_C(         193) },
3877     { { SIMDE_FLOAT64_C(   -61.02), SIMDE_FLOAT64_C(   247.60) },
3878       -INT32_C(          61) },
3879     { { SIMDE_FLOAT64_C(   396.64), SIMDE_FLOAT64_C(   103.10) },
3880        INT32_C(         396) },
3881     { { SIMDE_FLOAT64_C(   606.36), SIMDE_FLOAT64_C(  -703.92) },
3882        INT32_C(         606) },
3883   };
3884 
3885   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
3886     simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
3887     int32_t r = simde_mm_cvttsd_si32(a);
3888     simde_assert_equal_i32(r, test_vec[i].r);
3889   }
3890 
3891   return 0;
3892 }
3893 
3894 
3895 static int
3896 test_simde_mm_cvttsd_si64(SIMDE_MUNIT_TEST_ARGS) {
3897   const struct {
3898     simde__m128d a;
3899     int64_t r;
3900   } test_vec[8] = {
3901     { simde_mm_set_pd(SIMDE_FLOAT64_C(-345.97), SIMDE_FLOAT64_C( 664.87)),  664 },
3902     { simde_mm_set_pd(SIMDE_FLOAT64_C( 648.27), SIMDE_FLOAT64_C(-390.19)), -390 },
3903     { simde_mm_set_pd(SIMDE_FLOAT64_C(-500.63), SIMDE_FLOAT64_C(-258.15)), -258 },
3904     { simde_mm_set_pd(SIMDE_FLOAT64_C(-109.22), SIMDE_FLOAT64_C(-784.27)), -784 },
3905     { simde_mm_set_pd(SIMDE_FLOAT64_C(-688.79), SIMDE_FLOAT64_C(-698.22)), -698 },
3906     { simde_mm_set_pd(SIMDE_FLOAT64_C(-914.25), SIMDE_FLOAT64_C(-650.88)), -650 },
3907     { simde_mm_set_pd(SIMDE_FLOAT64_C(-228.35), SIMDE_FLOAT64_C(-603.46)), -603 },
3908     { simde_mm_set_pd(SIMDE_FLOAT64_C(-556.94), SIMDE_FLOAT64_C( 694.64)),  694 }
3909   };
3910 
3911   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3912     int64_t r = simde_mm_cvttsd_si64(test_vec[i].a);
3913     simde_assert_equal_i64(r, test_vec[i].r);
3914   }
3915 
3916   return 0;
3917 }
3918 
3919 static int
3920 test_simde_mm_div_pd(SIMDE_MUNIT_TEST_ARGS) {
3921   const struct {
3922     simde__m128d a;
3923     simde__m128d b;
3924     simde__m128d r;
3925   } test_vec[8] = {
3926     { simde_mm_set_pd(SIMDE_FLOAT64_C( -184.40), SIMDE_FLOAT64_C(  992.45)),
3927       simde_mm_set_pd(SIMDE_FLOAT64_C(  155.72), SIMDE_FLOAT64_C(  856.52)),
3928       simde_mm_set_pd(SIMDE_FLOAT64_C(   -1.18), SIMDE_FLOAT64_C(    1.16)) },
3929     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -34.37), SIMDE_FLOAT64_C(  596.67)),
3930       simde_mm_set_pd(SIMDE_FLOAT64_C( -718.99), SIMDE_FLOAT64_C(  -17.98)),
3931       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.05), SIMDE_FLOAT64_C(  -33.19)) },
3932     { simde_mm_set_pd(SIMDE_FLOAT64_C(  797.64), SIMDE_FLOAT64_C(  669.98)),
3933       simde_mm_set_pd(SIMDE_FLOAT64_C( -872.55), SIMDE_FLOAT64_C(  857.06)),
3934       simde_mm_set_pd(SIMDE_FLOAT64_C(   -0.91), SIMDE_FLOAT64_C(    0.78)) },
3935     { simde_mm_set_pd(SIMDE_FLOAT64_C( -864.62), SIMDE_FLOAT64_C(  635.60)),
3936       simde_mm_set_pd(SIMDE_FLOAT64_C( -556.59), SIMDE_FLOAT64_C(  676.91)),
3937       simde_mm_set_pd(SIMDE_FLOAT64_C(    1.55), SIMDE_FLOAT64_C(    0.94)) },
3938     { simde_mm_set_pd(SIMDE_FLOAT64_C( -635.32), SIMDE_FLOAT64_C(  518.94)),
3939       simde_mm_set_pd(SIMDE_FLOAT64_C( -426.58), SIMDE_FLOAT64_C( -331.30)),
3940       simde_mm_set_pd(SIMDE_FLOAT64_C(    1.49), SIMDE_FLOAT64_C(   -1.57)) },
3941     { simde_mm_set_pd(SIMDE_FLOAT64_C( -494.90), SIMDE_FLOAT64_C(  -42.04)),
3942       simde_mm_set_pd(SIMDE_FLOAT64_C(  432.98), SIMDE_FLOAT64_C(  277.38)),
3943       simde_mm_set_pd(SIMDE_FLOAT64_C(   -1.14), SIMDE_FLOAT64_C(   -0.15)) },
3944     { simde_mm_set_pd(SIMDE_FLOAT64_C( -530.41), SIMDE_FLOAT64_C(  112.07)),
3945       simde_mm_set_pd(SIMDE_FLOAT64_C( -914.02), SIMDE_FLOAT64_C(   90.15)),
3946       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.58), SIMDE_FLOAT64_C(    1.24)) },
3947     { simde_mm_set_pd(SIMDE_FLOAT64_C( -176.73), SIMDE_FLOAT64_C( -245.01)),
3948       simde_mm_set_pd(SIMDE_FLOAT64_C(  315.38), SIMDE_FLOAT64_C( -747.83)),
3949       simde_mm_set_pd(SIMDE_FLOAT64_C(   -0.56), SIMDE_FLOAT64_C(    0.33)) }
3950   };
3951 
3952   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3953     simde__m128d r = simde_mm_div_pd(test_vec[i].a, test_vec[i].b);
3954     simde_assert_m128d_close(r, test_vec[i].r, 1);
3955   }
3956 
3957   return 0;
3958 }
3959 
3960 static int
3961 test_simde_mm_div_sd(SIMDE_MUNIT_TEST_ARGS) {
3962   const struct {
3963     simde__m128d a;
3964     simde__m128d b;
3965     simde__m128d r;
3966   } test_vec[8] = {
3967     { simde_mm_set_pd(SIMDE_FLOAT64_C( -164.12), SIMDE_FLOAT64_C( -192.56)),
3968       simde_mm_set_pd(SIMDE_FLOAT64_C( -917.87), SIMDE_FLOAT64_C(  429.05)),
3969       simde_mm_set_pd(SIMDE_FLOAT64_C( -164.12), SIMDE_FLOAT64_C(   -0.45)) },
3970     { simde_mm_set_pd(SIMDE_FLOAT64_C(  658.48), SIMDE_FLOAT64_C(  164.94)),
3971       simde_mm_set_pd(SIMDE_FLOAT64_C(  -29.79), SIMDE_FLOAT64_C(  356.73)),
3972       simde_mm_set_pd(SIMDE_FLOAT64_C(  658.48), SIMDE_FLOAT64_C(    0.46)) },
3973     { simde_mm_set_pd(SIMDE_FLOAT64_C( -450.41), SIMDE_FLOAT64_C(  587.65)),
3974       simde_mm_set_pd(SIMDE_FLOAT64_C(  553.54), SIMDE_FLOAT64_C( -684.22)),
3975       simde_mm_set_pd(SIMDE_FLOAT64_C( -450.41), SIMDE_FLOAT64_C(   -0.86)) },
3976     { simde_mm_set_pd(SIMDE_FLOAT64_C( -742.91), SIMDE_FLOAT64_C( -966.41)),
3977       simde_mm_set_pd(SIMDE_FLOAT64_C(  180.37), SIMDE_FLOAT64_C(  175.93)),
3978       simde_mm_set_pd(SIMDE_FLOAT64_C( -742.91), SIMDE_FLOAT64_C(   -5.49)) },
3979     { simde_mm_set_pd(SIMDE_FLOAT64_C( -280.67), SIMDE_FLOAT64_C(  415.67)),
3980       simde_mm_set_pd(SIMDE_FLOAT64_C(  495.86), SIMDE_FLOAT64_C( -819.23)),
3981       simde_mm_set_pd(SIMDE_FLOAT64_C( -280.67), SIMDE_FLOAT64_C(   -0.51)) },
3982     { simde_mm_set_pd(SIMDE_FLOAT64_C(  391.69), SIMDE_FLOAT64_C(  589.87)),
3983       simde_mm_set_pd(SIMDE_FLOAT64_C( -651.02), SIMDE_FLOAT64_C( -239.35)),
3984       simde_mm_set_pd(SIMDE_FLOAT64_C(  391.69), SIMDE_FLOAT64_C(   -2.46)) },
3985     { simde_mm_set_pd(SIMDE_FLOAT64_C(  423.52), SIMDE_FLOAT64_C(  419.28)),
3986       simde_mm_set_pd(SIMDE_FLOAT64_C(  336.50), SIMDE_FLOAT64_C(  431.02)),
3987       simde_mm_set_pd(SIMDE_FLOAT64_C(  423.52), SIMDE_FLOAT64_C(    0.97)) },
3988     { simde_mm_set_pd(SIMDE_FLOAT64_C(  235.17), SIMDE_FLOAT64_C( -333.81)),
3989       simde_mm_set_pd(SIMDE_FLOAT64_C( -715.21), SIMDE_FLOAT64_C(    4.82)),
3990       simde_mm_set_pd(SIMDE_FLOAT64_C(  235.17), SIMDE_FLOAT64_C(  -69.26)) }
3991   };
3992 
3993   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
3994     simde__m128d r = simde_mm_div_sd(test_vec[i].a, test_vec[i].b);
3995     simde_assert_m128d_close(r, test_vec[i].r, 1);
3996   }
3997 
3998   return 0;
3999 }
4000 
4001 static int
4002 test_simde_mm_extract_epi16(SIMDE_MUNIT_TEST_ARGS) {
4003   const struct {
4004     simde__m128i a;
4005     int32_t r;
4006   } test_vec[8] = {
4007     { simde_mm_set_epi16(INT16_C(-22888), INT16_C(-26241), INT16_C( 16094), INT16_C( 25365),
4008                          INT16_C(-10975), INT16_C( -4323), INT16_C(  9478), INT16_C(-18966)), 54561 },
4009     { simde_mm_set_epi16(INT16_C( -4494), INT16_C(-23544), INT16_C( 12313), INT16_C( 19220),
4010                          INT16_C( 16921), INT16_C(  9248), INT16_C( -1076), INT16_C(-18617)), 16921 },
4011     { simde_mm_set_epi16(INT16_C(  5051), INT16_C( 30913), INT16_C( 18404), INT16_C(-11820),
4012                          INT16_C( 16495), INT16_C( 32647), INT16_C( 21150), INT16_C( 16664)), 16495 },
4013     { simde_mm_set_epi16(INT16_C(   987), INT16_C( 32176), INT16_C(-17758), INT16_C( 21096),
4014                          INT16_C(  -945), INT16_C(  5537), INT16_C(  5495), INT16_C(-18130)), 64591 },
4015     { simde_mm_set_epi16(INT16_C(-16046), INT16_C( 13714), INT16_C( 12272), INT16_C( 32151),
4016                          INT16_C(-14156), INT16_C(  8851), INT16_C(-19624), INT16_C( -2653)), 51380 },
4017     { simde_mm_set_epi16(INT16_C(-28172), INT16_C(  1666), INT16_C( 15569), INT16_C( -1622),
4018                          INT16_C( 22048), INT16_C(-24364), INT16_C(  2478), INT16_C( 20826)), 22048 },
4019     { simde_mm_set_epi16(INT16_C(-29653), INT16_C(-27750), INT16_C(  5027), INT16_C( -7816),
4020                          INT16_C(-20852), INT16_C(  3178), INT16_C(-27881), INT16_C(  3156)), 44684 },
4021     { simde_mm_set_epi16(INT16_C(-26280), INT16_C( 27067), INT16_C( 10815), INT16_C(-30178),
4022                          INT16_C(-26852), INT16_C( 26399), INT16_C(-30202), INT16_C(-11030)), 38684 }
4023   };
4024 
4025   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4026     int32_t r = simde_mm_extract_epi16(test_vec[i].a, 3);
4027     simde_assert_equal_i32(r, test_vec[i].r);
4028   }
4029 
4030   return 0;
4031 }
4032 
4033 static int
4034 test_simde_mm_insert_epi16(SIMDE_MUNIT_TEST_ARGS) {
4035   const struct {
4036     simde__m128i a;
4037     int16_t b;
4038     simde__m128i r;
4039   } test_vec[8] = {
4040     { simde_mm_set_epi16(INT16_C(-18659), INT16_C( 19491), INT16_C(-30434), INT16_C( -6059),
4041                          INT16_C( 11985), INT16_C(  5369), INT16_C(-14188), INT16_C(  9668)),
4042        21712,
4043       simde_mm_set_epi16(INT16_C(-18659), INT16_C( 19491), INT16_C(-30434), INT16_C( -6059),
4044                          INT16_C( 21712), INT16_C(  5369), INT16_C(-14188), INT16_C(  9668)) },
4045     { simde_mm_set_epi16(INT16_C( 32684), INT16_C(-21716), INT16_C(  7657), INT16_C(  3627),
4046                          INT16_C( 12377), INT16_C( 30609), INT16_C(-12611), INT16_C(-11955)),
4047       -27473,
4048       simde_mm_set_epi16(INT16_C( 32684), INT16_C(-21716), INT16_C(  7657), INT16_C(  3627),
4049                          INT16_C(-27473), INT16_C( 30609), INT16_C(-12611), INT16_C(-11955)) },
4050     { simde_mm_set_epi16(INT16_C(-18344), INT16_C( -4896), INT16_C(-19094), INT16_C(  -638),
4051                          INT16_C(-30376), INT16_C(-17556), INT16_C(-31358), INT16_C(-17530)),
4052       -19116,
4053       simde_mm_set_epi16(INT16_C(-18344), INT16_C( -4896), INT16_C(-19094), INT16_C(  -638),
4054                          INT16_C(-19116), INT16_C(-17556), INT16_C(-31358), INT16_C(-17530)) },
4055     { simde_mm_set_epi16(INT16_C(-11121), INT16_C( 29288), INT16_C( -3915), INT16_C( 13306),
4056                          INT16_C( 30582), INT16_C(  4374), INT16_C( -9323), INT16_C( -2317)),
4057         5778,
4058       simde_mm_set_epi16(INT16_C(-11121), INT16_C( 29288), INT16_C( -3915), INT16_C( 13306),
4059                          INT16_C(  5778), INT16_C(  4374), INT16_C( -9323), INT16_C( -2317)) },
4060     { simde_mm_set_epi16(INT16_C(  7542), INT16_C(-16196), INT16_C(-24612), INT16_C(  8929),
4061                          INT16_C(-16460), INT16_C( 17259), INT16_C(   672), INT16_C(-18076)),
4062         -411,
4063       simde_mm_set_epi16(INT16_C(  7542), INT16_C(-16196), INT16_C(-24612), INT16_C(  8929),
4064                          INT16_C(  -411), INT16_C( 17259), INT16_C(   672), INT16_C(-18076)) },
4065     { simde_mm_set_epi16(INT16_C( 15913), INT16_C(-18873), INT16_C( 14978), INT16_C( 31946),
4066                          INT16_C( -6939), INT16_C( 26150), INT16_C( 18499), INT16_C(-16752)),
4067         1141,
4068       simde_mm_set_epi16(INT16_C( 15913), INT16_C(-18873), INT16_C( 14978), INT16_C( 31946),
4069                          INT16_C(  1141), INT16_C( 26150), INT16_C( 18499), INT16_C(-16752)) },
4070     { simde_mm_set_epi16(INT16_C(  1093), INT16_C( -6101), INT16_C(-30747), INT16_C(-18266),
4071                          INT16_C(  4085), INT16_C(-14478), INT16_C( -6279), INT16_C(-25531)),
4072       -18605,
4073       simde_mm_set_epi16(INT16_C(  1093), INT16_C( -6101), INT16_C(-30747), INT16_C(-18266),
4074                          INT16_C(-18605), INT16_C(-14478), INT16_C( -6279), INT16_C(-25531)) },
4075     { simde_mm_set_epi16(INT16_C( -2463), INT16_C( -3389), INT16_C( 28311), INT16_C( -5667),
4076                          INT16_C( 24886), INT16_C( 24368), INT16_C( 19484), INT16_C(-11581)),
4077       -17420,
4078       simde_mm_set_epi16(INT16_C( -2463), INT16_C( -3389), INT16_C( 28311), INT16_C( -5667),
4079                          INT16_C(-17420), INT16_C( 24368), INT16_C( 19484), INT16_C(-11581)) }
4080   };
4081 
4082   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4083     simde__m128i r = simde_mm_insert_epi16(test_vec[i].a, test_vec[i].b, 3);
4084     simde_assert_m128i_i16(r, ==, test_vec[i].r);
4085   }
4086 
4087   return 0;
4088 }
4089 
4090 static int
4091 test_simde_mm_load_pd (SIMDE_MUNIT_TEST_ARGS) {
4092   static const struct {
4093     const SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 a[2];
4094     const simde_float64 r[2];
4095   } test_vec[] = {
4096     { { SIMDE_FLOAT64_C(   319.94), SIMDE_FLOAT64_C(   593.75) },
4097       { SIMDE_FLOAT64_C(   319.94), SIMDE_FLOAT64_C(   593.75) } },
4098     { { SIMDE_FLOAT64_C(  -220.38), SIMDE_FLOAT64_C(   646.62) },
4099       { SIMDE_FLOAT64_C(  -220.38), SIMDE_FLOAT64_C(   646.62) } },
4100     { { SIMDE_FLOAT64_C(   769.82), SIMDE_FLOAT64_C(   960.64) },
4101       { SIMDE_FLOAT64_C(   769.82), SIMDE_FLOAT64_C(   960.64) } },
4102     { { SIMDE_FLOAT64_C(  -283.67), SIMDE_FLOAT64_C(  -795.53) },
4103       { SIMDE_FLOAT64_C(  -283.67), SIMDE_FLOAT64_C(  -795.53) } },
4104     { { SIMDE_FLOAT64_C(  -643.22), SIMDE_FLOAT64_C(   246.89) },
4105       { SIMDE_FLOAT64_C(  -643.22), SIMDE_FLOAT64_C(   246.89) } },
4106     { { SIMDE_FLOAT64_C(  -842.54), SIMDE_FLOAT64_C(  -513.83) },
4107       { SIMDE_FLOAT64_C(  -842.54), SIMDE_FLOAT64_C(  -513.83) } },
4108     { { SIMDE_FLOAT64_C(   635.37), SIMDE_FLOAT64_C(   836.97) },
4109       { SIMDE_FLOAT64_C(   635.37), SIMDE_FLOAT64_C(   836.97) } },
4110     { { SIMDE_FLOAT64_C(   838.72), SIMDE_FLOAT64_C(  -197.92) },
4111       { SIMDE_FLOAT64_C(   838.72), SIMDE_FLOAT64_C(  -197.92) } }
4112   };
4113 
4114   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
4115     simde_test_x86_assert_equal_f64x2(simde_mm_load_pd(test_vec[i].r), simde_mm_loadu_pd(test_vec[i].r), 1);
4116   }
4117 
4118   return 0;
4119 }
4120 
4121 static int
4122 test_simde_mm_load_pd1(SIMDE_MUNIT_TEST_ARGS) {
4123   const struct {
4124     simde_float64 a;
4125     simde__m128d r;
4126   } test_vec[8] = {
4127     {SIMDE_FLOAT64_C( -639.28), simde_mm_set_pd(SIMDE_FLOAT64_C(-639.28), SIMDE_FLOAT64_C(-639.28)) },
4128     {SIMDE_FLOAT64_C(  754.31), simde_mm_set_pd(SIMDE_FLOAT64_C( 754.31), SIMDE_FLOAT64_C( 754.31)) },
4129     {SIMDE_FLOAT64_C( -143.09), simde_mm_set_pd(SIMDE_FLOAT64_C(-143.09), SIMDE_FLOAT64_C(-143.09)) },
4130     {SIMDE_FLOAT64_C( -509.95), simde_mm_set_pd(SIMDE_FLOAT64_C(-509.95), SIMDE_FLOAT64_C(-509.95)) },
4131     {SIMDE_FLOAT64_C(  357.11), simde_mm_set_pd(SIMDE_FLOAT64_C( 357.11), SIMDE_FLOAT64_C( 357.11)) },
4132     {SIMDE_FLOAT64_C(  414.83), simde_mm_set_pd(SIMDE_FLOAT64_C( 414.83), SIMDE_FLOAT64_C( 414.83)) },
4133     {SIMDE_FLOAT64_C(  416.46), simde_mm_set_pd(SIMDE_FLOAT64_C( 416.46), SIMDE_FLOAT64_C( 416.46)) },
4134     {SIMDE_FLOAT64_C(  167.42), simde_mm_set_pd(SIMDE_FLOAT64_C( 167.42), SIMDE_FLOAT64_C( 167.42)) }
4135   };
4136 
4137   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4138     simde__m128d r = simde_mm_load_pd1(&test_vec[i].a);
4139     simde_assert_m128d_close(r, test_vec[i].r, 1);
4140   }
4141 
4142   return 0;
4143 }
4144 
4145 static int
4146 test_simde_mm_load_sd(SIMDE_MUNIT_TEST_ARGS) {
4147   const struct {
4148     simde_float64 a;
4149     simde__m128d r;
4150   } test_vec[8] = {
4151     {SIMDE_FLOAT64_C(  883.59), simde_mm_set_pd(SIMDE_FLOAT64_C(   0.00), SIMDE_FLOAT64_C( 883.59)) },
4152     {SIMDE_FLOAT64_C(  719.08), simde_mm_set_pd(SIMDE_FLOAT64_C(   0.00), SIMDE_FLOAT64_C( 719.08)) },
4153     {SIMDE_FLOAT64_C(  -82.94), simde_mm_set_pd(SIMDE_FLOAT64_C(   0.00), SIMDE_FLOAT64_C( -82.94)) },
4154     {SIMDE_FLOAT64_C(  -87.79), simde_mm_set_pd(SIMDE_FLOAT64_C(   0.00), SIMDE_FLOAT64_C( -87.79)) },
4155     {SIMDE_FLOAT64_C(  309.31), simde_mm_set_pd(SIMDE_FLOAT64_C(   0.00), SIMDE_FLOAT64_C( 309.31)) },
4156     {SIMDE_FLOAT64_C( -987.67), simde_mm_set_pd(SIMDE_FLOAT64_C(   0.00), SIMDE_FLOAT64_C(-987.67)) },
4157     {SIMDE_FLOAT64_C(  196.18), simde_mm_set_pd(SIMDE_FLOAT64_C(   0.00), SIMDE_FLOAT64_C( 196.18)) },
4158     {SIMDE_FLOAT64_C(  313.82), simde_mm_set_pd(SIMDE_FLOAT64_C(   0.00), SIMDE_FLOAT64_C( 313.82)) }
4159   };
4160 
4161   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4162     simde__m128d r = simde_mm_load_sd(&test_vec[i].a);
4163     simde_assert_m128d_close(r, test_vec[i].r, 1);
4164   }
4165 
4166   return 0;
4167 }
4168 
4169 static int
4170 test_simde_mm_load_si128(SIMDE_MUNIT_TEST_ARGS) {
4171   const struct {
4172     simde__m128i a;
4173     simde__m128i r;
4174   } test_vec[8] = {
4175     { simde_mm_set_epi32(INT32_C(-1485513264), INT32_C( 2130924320), INT32_C( 1226074611), INT32_C( -306486659)),
4176       simde_mm_set_epi32(INT32_C(-1485513264), INT32_C( 2130924320), INT32_C( 1226074611), INT32_C( -306486659)) },
4177     { simde_mm_set_epi32(INT32_C(  952258085), INT32_C( -534886765), INT32_C( -354984724), INT32_C(-1450427500)),
4178       simde_mm_set_epi32(INT32_C(  952258085), INT32_C( -534886765), INT32_C( -354984724), INT32_C(-1450427500)) },
4179     { simde_mm_set_epi32(INT32_C(-1417585996), INT32_C(  546041970), INT32_C(-1469146664), INT32_C(-2062567602)),
4180       simde_mm_set_epi32(INT32_C(-1417585996), INT32_C(  546041970), INT32_C(-1469146664), INT32_C(-2062567602)) },
4181     { simde_mm_set_epi32(INT32_C(-1482230799), INT32_C(-1421432180), INT32_C(-1588201284), INT32_C(-1267673212)),
4182       simde_mm_set_epi32(INT32_C(-1482230799), INT32_C(-1421432180), INT32_C(-1588201284), INT32_C(-1267673212)) },
4183     { simde_mm_set_epi32(INT32_C(-1153380991), INT32_C( 1838203743), INT32_C( -180063833), INT32_C( -699223421)),
4184       simde_mm_set_epi32(INT32_C(-1153380991), INT32_C( 1838203743), INT32_C( -180063833), INT32_C( -699223421)) },
4185     { simde_mm_set_epi32(INT32_C( -711752348), INT32_C(  464328511), INT32_C( 1773807699), INT32_C(  849844772)),
4186       simde_mm_set_epi32(INT32_C( -711752348), INT32_C(  464328511), INT32_C( 1773807699), INT32_C(  849844772)) },
4187     { simde_mm_set_epi32(INT32_C(-1083662155), INT32_C( -641783129), INT32_C(-1893537704), INT32_C( 1971283674)),
4188       simde_mm_set_epi32(INT32_C(-1083662155), INT32_C( -641783129), INT32_C(-1893537704), INT32_C( 1971283674)) },
4189     { simde_mm_set_epi32(INT32_C(-1329936037), INT32_C( -364329957), INT32_C(-1886427840), INT32_C(-1935682760)),
4190       simde_mm_set_epi32(INT32_C(-1329936037), INT32_C( -364329957), INT32_C(-1886427840), INT32_C(-1935682760)) }
4191   };
4192 
4193   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4194     simde__m128i r = simde_mm_load_si128(&test_vec[i].a);
4195     simde_assert_m128i_i32(r, ==, test_vec[i].r);
4196   }
4197 
4198   return 0;
4199 }
4200 
4201 static int
4202 test_simde_mm_loadh_pd(SIMDE_MUNIT_TEST_ARGS) {
4203   const struct {
4204     simde__m128d a;
4205     simde_float64 b;
4206     simde__m128d r;
4207   } test_vec[8] = {
4208     { simde_mm_set_pd(SIMDE_FLOAT64_C( -625.39), SIMDE_FLOAT64_C( -212.79)),
4209 SIMDE_FLOAT64_C(       -544.03),
4210       simde_mm_set_pd(SIMDE_FLOAT64_C( -544.03), SIMDE_FLOAT64_C( -212.79)) },
4211     { simde_mm_set_pd(SIMDE_FLOAT64_C(  906.58), SIMDE_FLOAT64_C( -446.43)),
4212 SIMDE_FLOAT64_C(       -955.62),
4213       simde_mm_set_pd(SIMDE_FLOAT64_C( -955.62), SIMDE_FLOAT64_C( -446.43)) },
4214     { simde_mm_set_pd(SIMDE_FLOAT64_C(  575.38), SIMDE_FLOAT64_C( -468.21)),
4215 SIMDE_FLOAT64_C(       -790.22),
4216       simde_mm_set_pd(SIMDE_FLOAT64_C( -790.22), SIMDE_FLOAT64_C( -468.21)) },
4217     { simde_mm_set_pd(SIMDE_FLOAT64_C(  375.10), SIMDE_FLOAT64_C( -731.74)),
4218 SIMDE_FLOAT64_C(        857.52),
4219       simde_mm_set_pd(SIMDE_FLOAT64_C(  857.52), SIMDE_FLOAT64_C( -731.74)) },
4220     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -48.07), SIMDE_FLOAT64_C( -201.78)),
4221 SIMDE_FLOAT64_C(       -122.99),
4222       simde_mm_set_pd(SIMDE_FLOAT64_C( -122.99), SIMDE_FLOAT64_C( -201.78)) },
4223     { simde_mm_set_pd(SIMDE_FLOAT64_C(   97.66), SIMDE_FLOAT64_C( -743.76)),
4224 SIMDE_FLOAT64_C(        123.61),
4225       simde_mm_set_pd(SIMDE_FLOAT64_C(  123.61), SIMDE_FLOAT64_C( -743.76)) },
4226     { simde_mm_set_pd(SIMDE_FLOAT64_C(  622.43), SIMDE_FLOAT64_C( -815.78)),
4227 SIMDE_FLOAT64_C(       -884.62),
4228       simde_mm_set_pd(SIMDE_FLOAT64_C( -884.62), SIMDE_FLOAT64_C( -815.78)) },
4229     { simde_mm_set_pd(SIMDE_FLOAT64_C(  233.02), SIMDE_FLOAT64_C(  337.90)),
4230 SIMDE_FLOAT64_C(        566.08),
4231       simde_mm_set_pd(SIMDE_FLOAT64_C(  566.08), SIMDE_FLOAT64_C(  337.90)) }
4232   };
4233 
4234   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4235     simde__m128d r = simde_mm_loadh_pd(test_vec[i].a, &test_vec[i].b);
4236     simde_assert_m128d_close(r, test_vec[i].r, 4);
4237   }
4238 
4239   return 0;
4240 }
4241 
4242 static int
4243 test_simde_mm_loadl_epi64 (SIMDE_MUNIT_TEST_ARGS) {
4244   static const struct {
4245     const int64_t a;
4246     const int64_t r[2];
4247   } test_vec[] = {
4248     { -INT64_C( 5374209034103506743),
4249       { -INT64_C( 5374209034103506743),  INT64_C(0) } },
4250     { -INT64_C( 8818261387786582106),
4251       { -INT64_C( 8818261387786582106),  INT64_C(0) } },
4252     {  INT64_C( 8778417490344874118),
4253       {  INT64_C( 8778417490344874118),  INT64_C(0) } },
4254     {  INT64_C( 1040805703196854697),
4255       {  INT64_C( 1040805703196854697),  INT64_C(0) } },
4256     { -INT64_C( 6883770744639848089),
4257       { -INT64_C( 6883770744639848089),  INT64_C(0) } },
4258     { -INT64_C( 6091281060752135947),
4259       { -INT64_C( 6091281060752135947),  INT64_C(0) } },
4260     {  INT64_C( 7649374694561713533),
4261       {  INT64_C( 7649374694561713533),  INT64_C(0) } },
4262     {  INT64_C( 9018079017176557522),
4263       {  INT64_C( 9018079017176557522),  INT64_C(0) } }
4264   };
4265 
4266   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
4267     simde__m128i r = simde_mm_loadl_epi64(SIMDE_ALIGN_CAST(simde__m128i const*, &test_vec[i].a));
4268     simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r));
4269   }
4270 
4271   return 0;
4272 }
4273 
4274 static int
4275 test_simde_mm_loadl_pd(SIMDE_MUNIT_TEST_ARGS) {
4276   const struct {
4277     simde__m128d a;
4278     simde_float64 b;
4279     simde__m128d r;
4280   } test_vec[8] = {
4281     { simde_mm_set_pd(SIMDE_FLOAT64_C(  398.25), SIMDE_FLOAT64_C(  169.44)),
4282 SIMDE_FLOAT64_C(        512.14),
4283       simde_mm_set_pd(SIMDE_FLOAT64_C(  398.25), SIMDE_FLOAT64_C(  512.14)) },
4284     { simde_mm_set_pd(SIMDE_FLOAT64_C(  747.05), SIMDE_FLOAT64_C(  122.36)),
4285 SIMDE_FLOAT64_C(       -219.24),
4286       simde_mm_set_pd(SIMDE_FLOAT64_C(  747.05), SIMDE_FLOAT64_C( -219.24)) },
4287     { simde_mm_set_pd(SIMDE_FLOAT64_C(  806.68), SIMDE_FLOAT64_C(  439.45)),
4288 SIMDE_FLOAT64_C(        545.31),
4289       simde_mm_set_pd(SIMDE_FLOAT64_C(  806.68), SIMDE_FLOAT64_C(  545.31)) },
4290     { simde_mm_set_pd(SIMDE_FLOAT64_C( -494.45), SIMDE_FLOAT64_C(  273.54)),
4291 SIMDE_FLOAT64_C(        233.72),
4292       simde_mm_set_pd(SIMDE_FLOAT64_C( -494.45), SIMDE_FLOAT64_C(  233.72)) },
4293     { simde_mm_set_pd(SIMDE_FLOAT64_C( -435.22), SIMDE_FLOAT64_C( -790.14)),
4294 SIMDE_FLOAT64_C(        334.56),
4295       simde_mm_set_pd(SIMDE_FLOAT64_C( -435.22), SIMDE_FLOAT64_C(  334.56)) },
4296     { simde_mm_set_pd(SIMDE_FLOAT64_C( -387.92), SIMDE_FLOAT64_C(  587.13)),
4297 SIMDE_FLOAT64_C(        782.99),
4298       simde_mm_set_pd(SIMDE_FLOAT64_C( -387.92), SIMDE_FLOAT64_C(  782.99)) },
4299     { simde_mm_set_pd(SIMDE_FLOAT64_C( -656.10), SIMDE_FLOAT64_C( -868.90)),
4300 SIMDE_FLOAT64_C(       -241.17),
4301       simde_mm_set_pd(SIMDE_FLOAT64_C( -656.10), SIMDE_FLOAT64_C( -241.17)) },
4302     { simde_mm_set_pd(SIMDE_FLOAT64_C(  155.11), SIMDE_FLOAT64_C( -412.38)),
4303 SIMDE_FLOAT64_C(        606.64),
4304       simde_mm_set_pd(SIMDE_FLOAT64_C(  155.11), SIMDE_FLOAT64_C(  606.64)) }
4305   };
4306 
4307   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4308     simde__m128d r = simde_mm_loadl_pd(test_vec[i].a, &test_vec[i].b);
4309     simde_assert_m128d_close(r, test_vec[i].r, 4);
4310   }
4311 
4312   return 0;
4313 }
4314 
4315 static int
4316 test_simde_mm_loadr_pd(SIMDE_MUNIT_TEST_ARGS) {
4317   const struct {
4318     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 mem_addr[2];
4319     simde__m128d r;
4320   } test_vec[8] = {
4321     { {SIMDE_FLOAT64_C(   808.22), SIMDE_FLOAT64_C( -538.55) },
4322       simde_mm_set_pd(SIMDE_FLOAT64_C( 808.22), SIMDE_FLOAT64_C(-538.55)) },
4323     { {SIMDE_FLOAT64_C(   475.76), SIMDE_FLOAT64_C(  878.69) },
4324       simde_mm_set_pd(SIMDE_FLOAT64_C( 475.76), SIMDE_FLOAT64_C( 878.69)) },
4325     { {SIMDE_FLOAT64_C(  -400.00), SIMDE_FLOAT64_C( -135.07) },
4326       simde_mm_set_pd(SIMDE_FLOAT64_C(-400.00), SIMDE_FLOAT64_C(-135.07)) },
4327     { {SIMDE_FLOAT64_C(   -32.33), SIMDE_FLOAT64_C( -148.19) },
4328       simde_mm_set_pd(SIMDE_FLOAT64_C( -32.33), SIMDE_FLOAT64_C(-148.19)) },
4329     { {SIMDE_FLOAT64_C(  -971.23), SIMDE_FLOAT64_C( -835.90) },
4330       simde_mm_set_pd(SIMDE_FLOAT64_C(-971.23), SIMDE_FLOAT64_C(-835.90)) },
4331     { {SIMDE_FLOAT64_C(  -891.74), SIMDE_FLOAT64_C( -424.87) },
4332       simde_mm_set_pd(SIMDE_FLOAT64_C(-891.74), SIMDE_FLOAT64_C(-424.87)) },
4333     { {SIMDE_FLOAT64_C(  -199.77), SIMDE_FLOAT64_C(  631.45) },
4334       simde_mm_set_pd(SIMDE_FLOAT64_C(-199.77), SIMDE_FLOAT64_C( 631.45)) },
4335     { {SIMDE_FLOAT64_C(   410.30), SIMDE_FLOAT64_C(  721.68) },
4336       simde_mm_set_pd(SIMDE_FLOAT64_C( 410.30), SIMDE_FLOAT64_C( 721.68)) }
4337   };
4338 
4339   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4340     simde__m128d r = simde_mm_loadr_pd(test_vec[i].mem_addr);
4341     simde_assert_m128d_close(r, test_vec[i].r, 1);
4342   }
4343 
4344   return 0;
4345 }
4346 
4347 static int
4348 test_simde_mm_loadu_pd(SIMDE_MUNIT_TEST_ARGS) {
4349   const struct {
4350     simde_float64 mem_addr[2];
4351     simde__m128d r;
4352   } test_vec[8] = {
4353     { {SIMDE_FLOAT64_C(  -578.02), SIMDE_FLOAT64_C(   20.66) },
4354       simde_mm_set_pd(SIMDE_FLOAT64_C(  20.66), SIMDE_FLOAT64_C(-578.02)) },
4355     { {SIMDE_FLOAT64_C(   370.06), SIMDE_FLOAT64_C( -720.89) },
4356       simde_mm_set_pd(SIMDE_FLOAT64_C(-720.89), SIMDE_FLOAT64_C( 370.06)) },
4357     { {SIMDE_FLOAT64_C(   584.38), SIMDE_FLOAT64_C( -849.44) },
4358       simde_mm_set_pd(SIMDE_FLOAT64_C(-849.44), SIMDE_FLOAT64_C( 584.38)) },
4359     { {SIMDE_FLOAT64_C(   636.90), SIMDE_FLOAT64_C(  349.95) },
4360       simde_mm_set_pd(SIMDE_FLOAT64_C( 349.95), SIMDE_FLOAT64_C( 636.90)) },
4361     { {SIMDE_FLOAT64_C(  -617.52), SIMDE_FLOAT64_C(  599.47) },
4362       simde_mm_set_pd(SIMDE_FLOAT64_C( 599.47), SIMDE_FLOAT64_C(-617.52)) },
4363     { {SIMDE_FLOAT64_C(   633.70), SIMDE_FLOAT64_C(   30.57) },
4364       simde_mm_set_pd(SIMDE_FLOAT64_C(  30.57), SIMDE_FLOAT64_C( 633.70)) },
4365     { {SIMDE_FLOAT64_C(  -333.40), SIMDE_FLOAT64_C(  592.38) },
4366       simde_mm_set_pd(SIMDE_FLOAT64_C( 592.38), SIMDE_FLOAT64_C(-333.40)) },
4367     { {SIMDE_FLOAT64_C(  -335.86), SIMDE_FLOAT64_C(  212.26) },
4368       simde_mm_set_pd(SIMDE_FLOAT64_C( 212.26), SIMDE_FLOAT64_C(-335.86)) }
4369   };
4370 
4371   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4372     simde__m128d r = simde_mm_loadu_pd(test_vec[i].mem_addr);
4373     simde_assert_m128d_close(r, test_vec[i].r, 1);
4374   }
4375 
4376   return 0;
4377 }
4378 
4379 static int
4380 test_simde_mm_loadu_si128 (SIMDE_MUNIT_TEST_ARGS) {
4381   #if !defined(__clang__) || !defined(SIMDE_ARCH_ARM)
4382   static const struct {
4383     const int32_t a[4];
4384   } test_vec[] = {
4385     { { -INT32_C(   431648212),  INT32_C(   784010008), -INT32_C(  1621020084),  INT32_C(  1563595920) } },
4386     { { -INT32_C(  2020743978),  INT32_C(   642031476), -INT32_C(  1168838661), -INT32_C(    71485745) } },
4387     { { -INT32_C(   505281848),  INT32_C(  1510972686),  INT32_C(  1626960080), -INT32_C(   608359675) } },
4388     { { -INT32_C(  2073933297), -INT32_C(   441800983),  INT32_C(  1688206997), -INT32_C(    44016587) } },
4389     { {  INT32_C(  1843282527), -INT32_C(  1345851937),  INT32_C(  1661976670), -INT32_C(    79770388) } },
4390     { {  INT32_C(     8364054), -INT32_C(   605738426),  INT32_C(  1564443688),  INT32_C(  1079746529) } },
4391     { { -INT32_C(  1045612063), -INT32_C(  2056227801),  INT32_C(   552108084),  INT32_C(  1662789196) } },
4392     { {  INT32_C(   207854534), -INT32_C(   286832443), -INT32_C(  1387583796), -INT32_C(  1477597498) } }
4393   };
4394 
4395   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
4396     int32_t r[4];
4397     simde_mm_storeu_si128(SIMDE_ALIGN_CAST(simde__m128i*, r), simde_mm_loadu_si128(SIMDE_ALIGN_CAST(const simde__m128i*, test_vec[i].a)));
4398     simde_assert_equal_vi32(sizeof(r) / sizeof(r[0]), r, test_vec[i].a);
4399   }
4400   #endif
4401 
4402   return 0;
4403 }
4404 
4405 static int
4406 test_simde_mm_loadu_si16 (SIMDE_MUNIT_TEST_ARGS) {
4407   static const struct {
4408     const int16_t a;
4409     const int16_t r[8];
4410   } test_vec[] = {
4411     { -INT16_C( 11138),
4412       { -INT16_C( 11138),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
4413     {  INT16_C( 23724),
4414       {  INT16_C( 23724),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
4415     {  INT16_C( 14484),
4416       {  INT16_C( 14484),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
4417     {  INT16_C( 13428),
4418       {  INT16_C( 13428),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
4419     { -INT16_C(  4679),
4420       { -INT16_C(  4679),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
4421     { -INT16_C( 27444),
4422       { -INT16_C( 27444),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
4423     {  INT16_C( 23920),
4424       {  INT16_C( 23920),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } },
4425     {  INT16_C( 10692),
4426       {  INT16_C( 10692),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0),  INT16_C(     0) } }
4427   };
4428 
4429   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
4430     int16_t a = test_vec[i].a;
4431     simde__m128i r = HEDLEY_CONCAT(simde,_mm_loadu_si16)(&a);
4432     simde_test_x86_assert_equal_i16x8(r, simde_x_mm_loadu_epi16(test_vec[i].r));
4433   }
4434 
4435   return 0;
4436 }
4437 
4438 static int
4439 test_simde_mm_loadu_si32 (SIMDE_MUNIT_TEST_ARGS) {
4440   static const struct {
4441     const int32_t a;
4442     const int32_t r[4];
4443   } test_vec[] = {
4444     {  INT32_C(   418822831),
4445       {  INT32_C(   418822831),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
4446     {  INT32_C(  1942173819),
4447       {  INT32_C(  1942173819),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
4448     {  INT32_C(  1655488478),
4449       {  INT32_C(  1655488478),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
4450     { -INT32_C(  1203443910),
4451       { -INT32_C(  1203443910),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
4452     {  INT32_C(  1326772667),
4453       {  INT32_C(  1326772667),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
4454     { -INT32_C(   494630871),
4455       { -INT32_C(   494630871),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
4456     { -INT32_C(   445230987),
4457       { -INT32_C(   445230987),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } },
4458     {  INT32_C(  1544436653),
4459       {  INT32_C(  1544436653),  INT32_C(           0),  INT32_C(           0),  INT32_C(           0) } }
4460   };
4461 
4462   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
4463     int32_t a = test_vec[i].a;
4464     simde__m128i r = HEDLEY_CONCAT(simde,_mm_loadu_si32)(&a);
4465     simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
4466   }
4467 
4468   return 0;
4469 }
4470 
4471 static int
4472 test_simde_mm_loadu_si64 (SIMDE_MUNIT_TEST_ARGS) {
4473   static const struct {
4474     const int64_t a;
4475     const int64_t r[2];
4476   } test_vec[] = {
4477     {  INT64_C( 1937454096935355637),
4478       {  INT64_C( 1937454096935355637),  INT64_C(                   0) } },
4479     {  INT64_C( 3668957564122271735),
4480       {  INT64_C( 3668957564122271735),  INT64_C(                   0) } },
4481     { -INT64_C(  235024424980250958),
4482       { -INT64_C(  235024424980250958),  INT64_C(                   0) } },
4483     {  INT64_C( 7233045361154208854),
4484       {  INT64_C( 7233045361154208854),  INT64_C(                   0) } },
4485     {  INT64_C(  309823741680211445),
4486       {  INT64_C(  309823741680211445),  INT64_C(                   0) } },
4487     {  INT64_C( 4463101911464528198),
4488       {  INT64_C( 4463101911464528198),  INT64_C(                   0) } },
4489     { -INT64_C( 3557326416991718882),
4490       { -INT64_C( 3557326416991718882),  INT64_C(                   0) } },
4491     {  INT64_C( 7628307720165229322),
4492       {  INT64_C( 7628307720165229322),  INT64_C(                   0) } }
4493   };
4494 
4495   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
4496     int64_t a = test_vec[i].a;
4497     simde__m128i r = HEDLEY_CONCAT(simde,_mm_loadu_si64)(&a);
4498     simde_test_x86_assert_equal_i64x2(r, simde_x_mm_loadu_epi64(test_vec[i].r));
4499   }
4500 
4501   return 0;
4502 }
4503 
4504 static int
4505 test_simde_mm_madd_epi16(SIMDE_MUNIT_TEST_ARGS) {
4506   const struct {
4507     simde__m128i a;
4508     simde__m128i b;
4509     simde__m128i r;
4510   } test_vec[8] = {
4511     { simde_mm_set_epi16(INT16_C( 24289), INT16_C( 22642), INT16_C( 24338), INT16_C( 21466),
4512                          INT16_C(-21399), INT16_C(-25114), INT16_C(   -38), INT16_C( 24157)),
4513       simde_mm_set_epi16(INT16_C( -9939), INT16_C(-13077), INT16_C(-13691), INT16_C(-22496),
4514                          INT16_C( -7750), INT16_C(  2620), INT16_C(-25114), INT16_C(  6535)),
4515       simde_mm_set_epi32( -537497805,  -816110694,   100043570,   158820327) },
4516     { simde_mm_set_epi16(INT16_C( 13645), INT16_C( 20663), INT16_C( 21053), INT16_C(  5963),
4517                          INT16_C( -9189), INT16_C( -1395), INT16_C( 25221), INT16_C( 27070)),
4518       simde_mm_set_epi16(INT16_C(-28936), INT16_C(-24038), INT16_C(  6837), INT16_C( 18195),
4519                          INT16_C(-15316), INT16_C(-11637), INT16_C(  5200), INT16_C(  7885)),
4520       simde_mm_set_epi32( -891528914,   252436146,   156972339,   344596150) },
4521     { simde_mm_set_epi16(INT16_C(-31367), INT16_C(-13886), INT16_C( 25125), INT16_C(-12503),
4522                          INT16_C( 15451), INT16_C( -6370), INT16_C(-24113), INT16_C(  2653)),
4523       simde_mm_set_epi16(INT16_C(  5595), INT16_C(-23387), INT16_C(-11854), INT16_C(   264),
4524                          INT16_C( 15071), INT16_C(  3868), INT16_C(-30127), INT16_C(  2383)),
4525       simde_mm_set_epi32(  149253517,  -301132542,   208222861,   732774450) },
4526     { simde_mm_set_epi16(INT16_C(  6250), INT16_C(   794), INT16_C( -7973), INT16_C( 27046),
4527                          INT16_C( 13164), INT16_C( 16469), INT16_C(  1989), INT16_C(-24542)),
4528       simde_mm_set_epi16(INT16_C( 22452), INT16_C( -4402), INT16_C(-26550), INT16_C( 31252),
4529                          INT16_C(-29251), INT16_C( -1650), INT16_C(-13867), INT16_C( 26387)),
4530       simde_mm_set_epi32(  136829812,  1056924742,  -412234014,  -675171217) },
4531     { simde_mm_set_epi16(INT16_C(-17693), INT16_C(-18978), INT16_C( 22797), INT16_C( 31393),
4532                          INT16_C( 32262), INT16_C(-21009), INT16_C(  9435), INT16_C( 20059)),
4533       simde_mm_set_epi16(INT16_C( 20064), INT16_C(  4406), INT16_C(  1105), INT16_C(-32185),
4534                          INT16_C( 26331), INT16_C( -8672), INT16_C( 15113), INT16_C(-24381)),
4535       simde_mm_set_epi32( -438609420,  -985193020,  1031680770,  -346467324) },
4536     { simde_mm_set_epi16(INT16_C( -9214), INT16_C(-31455), INT16_C(-14871), INT16_C( -8603),
4537                          INT16_C( 17039), INT16_C(-27694), INT16_C( 18091), INT16_C( 27811)),
4538       simde_mm_set_epi16(INT16_C(  9903), INT16_C(  7626), INT16_C( -7009), INT16_C(-11696),
4539                          INT16_C(-31989), INT16_C( 28434), INT16_C(-24743), INT16_C(-27058)),
4540       simde_mm_set_epi32( -331122072,   204851527, -1332511767, -1200135651) },
4541     { simde_mm_set_epi16(INT16_C( 20741), INT16_C(  5382), INT16_C(-29692), INT16_C( 12589),
4542                          INT16_C( 21204), INT16_C(  3076), INT16_C(-24365), INT16_C( -1783)),
4543       simde_mm_set_epi16(INT16_C(-15203), INT16_C(-26894), INT16_C( -6878), INT16_C(-23472),
4544                          INT16_C(-18994), INT16_C( 11044), INT16_C( 15739), INT16_C(  -241)),
4545       simde_mm_set_epi32( -460068931,   -91267432,  -368777432,  -383051032) },
4546     { simde_mm_set_epi16(INT16_C( 24682), INT16_C( 17647), INT16_C(-19806), INT16_C(-13656),
4547                          INT16_C( 26394), INT16_C(  4814), INT16_C( -4589), INT16_C( 17983)),
4548       simde_mm_set_epi16(INT16_C(-32304), INT16_C(-30224), INT16_C(-20430), INT16_C(-28018),
4549                          INT16_C( 29012), INT16_C(  7494), INT16_C( -7871), INT16_C( 16228)),
4550       simde_mm_set_epi32(-1330690256,   787250388,   801818844,   327948143) }
4551   };
4552 
4553   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4554     simde__m128i r = simde_mm_madd_epi16(test_vec[i].a, test_vec[i].b);
4555     simde_assert_m128i_i32(r, ==, test_vec[i].r);
4556   }
4557 
4558   return 0;
4559 }
4560 
4561 static int
4562 test_simde_mm_maskmoveu_si128(SIMDE_MUNIT_TEST_ARGS) {
4563   const struct {
4564     simde__m128i a;
4565     simde__m128i mask;
4566     int8_t i[16];
4567     int8_t r[16];
4568   } test_vec[8] = {
4569     { simde_mm_set_epi8(INT8_C(-127), INT8_C( 121), INT8_C(  33), INT8_C(  92), INT8_C(  95), INT8_C(  30), INT8_C( 117), INT8_C( 103),
4570                         INT8_C( -74), INT8_C( -18), INT8_C(  28), INT8_C(  15), INT8_C(-111), INT8_C( -65), INT8_C( 117), INT8_C(  -8)),
4571       simde_mm_set_epi8(INT8_C(  42), INT8_C(  73), INT8_C(   1), INT8_C( -97), INT8_C( -45), INT8_C(  24), INT8_C(  88), INT8_C( -76),
4572                         INT8_C(-109), INT8_C(  78), INT8_C( -80), INT8_C( -97), INT8_C(  85), INT8_C( 102), INT8_C(  36), INT8_C( -19)),
4573       {    0, -116,  -92,  122,  -68,   23,  -43,   86,
4574          -50,  -28,  -43,   -3,   19, -114,  122,   62 },
4575       {   -8, -116,  -92,  122,   15,   28,  -43,  -74,
4576          103,  -28,  -43,   95,   92, -114,  122,   62 } },
4577     { simde_mm_set_epi8(INT8_C(  70), INT8_C( 102), INT8_C( -59), INT8_C(   6), INT8_C( -83), INT8_C(  40), INT8_C( -32), INT8_C(  43),
4578                         INT8_C(  90), INT8_C(-118), INT8_C(  82), INT8_C(  24), INT8_C(-106), INT8_C( -61), INT8_C( -19), INT8_C(-101)),
4579       simde_mm_set_epi8(INT8_C(  17), INT8_C( -58), INT8_C( -68), INT8_C(  86), INT8_C(  20), INT8_C(  40), INT8_C(  60), INT8_C(  30),
4580                         INT8_C(  31), INT8_C(  30), INT8_C(  18), INT8_C(  67), INT8_C(  -9), INT8_C( 103), INT8_C(  21), INT8_C( -50)),
4581       { -125,  126,  -10,  -60,   19,   92,   -8, -124,
4582           81, -122,  122,   13,  -58,   49, -122,   24 },
4583       { -101,  126,  -10, -106,   19,   92,   -8, -124,
4584           81, -122,  122,   13,  -58,  -59,  102,   24 } },
4585     { simde_mm_set_epi8(INT8_C( -26), INT8_C(-127), INT8_C(  58), INT8_C( -79), INT8_C( -88), INT8_C(-105), INT8_C( -66), INT8_C(  41),
4586                         INT8_C(  75), INT8_C( -34), INT8_C(  97), INT8_C( -55), INT8_C( -65), INT8_C( -30), INT8_C(  23), INT8_C(  28)),
4587       simde_mm_set_epi8(INT8_C(-107), INT8_C(  69), INT8_C( -28), INT8_C( -40), INT8_C( 105), INT8_C(   0), INT8_C( 114), INT8_C( 113),
4588                         INT8_C( -65), INT8_C( -82), INT8_C(  87), INT8_C( -14), INT8_C( -36), INT8_C(  68), INT8_C(-120), INT8_C(  38)),
4589       { -114,   55,  -16,   51,  110,  -44,   59,   -6,
4590           43,  -95,  -82,  119,  -56,    9,  -47,  -20 },
4591       { -114,   23,  -16,  -65,  -55,  -44,  -34,   75,
4592           43,  -95,  -82,  119,  -79,   58,  -47,  -26 } },
4593     { simde_mm_set_epi8(INT8_C(  87), INT8_C(  99), INT8_C(  22), INT8_C(  78), INT8_C(  93), INT8_C( -44), INT8_C( -98), INT8_C(  62),
4594                         INT8_C( -97), INT8_C( -50), INT8_C( -31), INT8_C(-109), INT8_C(  10), INT8_C( -86), INT8_C( -15), INT8_C(   7)),
4595       simde_mm_set_epi8(INT8_C(  96), INT8_C(  76), INT8_C(  14), INT8_C(-114), INT8_C(  84), INT8_C( -85), INT8_C(  61), INT8_C( -38),
4596                         INT8_C( -45), INT8_C( -83), INT8_C( -13), INT8_C( -50), INT8_C( -35), INT8_C(-111), INT8_C(-107), INT8_C( -50)),
4597       {   -8,   64,  -68,   23, -101,   35,  126,  119,
4598          -47,    4,   79,   23,  113,  117,  -76,   93 },
4599       {    7,  -15,  -86,   10, -109,  -31,  -50,  -97,
4600           62,    4,  -44,   23,   78,  117,  -76,   93 } },
4601     { simde_mm_set_epi8(INT8_C( -38), INT8_C(-119), INT8_C( -46), INT8_C( -13), INT8_C(  -3), INT8_C(-122), INT8_C(  75), INT8_C( 123),
4602                         INT8_C(  26), INT8_C( -71), INT8_C( -42), INT8_C(-124), INT8_C(  49), INT8_C(   1), INT8_C(   5), INT8_C( -90)),
4603       simde_mm_set_epi8(INT8_C( 116), INT8_C(  -8), INT8_C(  33), INT8_C(  63), INT8_C(-110), INT8_C( -94), INT8_C( -34), INT8_C(  66),
4604                         INT8_C(  51), INT8_C(-117), INT8_C(  28), INT8_C( -95), INT8_C( -52), INT8_C( 122), INT8_C( 118), INT8_C( -99)),
4605       {  -45,   85,   12,   62,  -89, -105,   90,  -19,
4606           48,   18,  -59,  -98,  -30, -113,   13,   91 },
4607       {  -90,   85,   12,   49, -124, -105,  -71,  -19,
4608           48,   75, -122,   -3,  -30, -113, -119,   91 } },
4609     { simde_mm_set_epi8(INT8_C(  52), INT8_C(  36), INT8_C( 112), INT8_C(  70), INT8_C( 110), INT8_C(  75), INT8_C(  -6), INT8_C(-101),
4610                         INT8_C(   3), INT8_C( 113), INT8_C( -32), INT8_C( 119), INT8_C( -19), INT8_C( 117), INT8_C(  31), INT8_C( 119)),
4611       simde_mm_set_epi8(INT8_C(  79), INT8_C(  39), INT8_C(   6), INT8_C(  30), INT8_C( 120), INT8_C( -75), INT8_C(  26), INT8_C(  57),
4612                         INT8_C( 123), INT8_C(  70), INT8_C(  40), INT8_C(  84), INT8_C( 111), INT8_C( -59), INT8_C( -79), INT8_C( -87)),
4613       {  -74,   -2, -124,   73,   44,   83,   18,  -48,
4614          -78,   27,   30,  -26,   -3,   56,   89,  125 },
4615       {  119,   31,  117,   73,   44,   83,   18,  -48,
4616          -78,   27,   75,  -26,   -3,   56,   89,  125 } },
4617     { simde_mm_set_epi8(INT8_C( -83), INT8_C( -42), INT8_C(-124), INT8_C( -81), INT8_C( -65), INT8_C(  46), INT8_C( -62), INT8_C( 102),
4618                         INT8_C(  50), INT8_C(   6), INT8_C(  -6), INT8_C( -21), INT8_C( -51), INT8_C(-114), INT8_C(-126), INT8_C(  74)),
4619       simde_mm_set_epi8(INT8_C( 127), INT8_C(-119), INT8_C( -85), INT8_C(  95), INT8_C(  70), INT8_C( -40), INT8_C(  17), INT8_C( 124),
4620                         INT8_C( -61), INT8_C(-126), INT8_C( 117), INT8_C( -42), INT8_C(  62), INT8_C(-111), INT8_C(-103), INT8_C(  57)),
4621       {  108,   40, -118,  -12,   90,  -12,  -24,   50,
4622          104,   87,  -20,  -86,  -53,   25,  -94, -101 },
4623       {  108, -126, -114,  -12,  -21,  -12,    6,   50,
4624          104,   87,   46,  -86,  -53, -124,  -42, -101 } },
4625     { simde_mm_set_epi8(INT8_C( -23), INT8_C( -92), INT8_C(  93), INT8_C( -78), INT8_C( -39), INT8_C( -72), INT8_C( -43), INT8_C(   1),
4626                         INT8_C(-121), INT8_C( 103), INT8_C(  61), INT8_C(  82), INT8_C(  45), INT8_C(-120), INT8_C( -86), INT8_C(  51)),
4627       simde_mm_set_epi8(INT8_C( -82), INT8_C(  92), INT8_C( -46), INT8_C( -97), INT8_C(  59), INT8_C( -16), INT8_C(  95), INT8_C( -85),
4628                         INT8_C(  38), INT8_C(  36), INT8_C(-125), INT8_C(  74), INT8_C(  14), INT8_C(  75), INT8_C( -74), INT8_C( -25)),
4629       {  -53,   48,   63,  -44, -103,   12,   49,   -1,
4630          -58,   70,  -18, -117,  101,  -90,  121,  -31 },
4631       {   51,  -86,   63,  -44, -103,   61,   49,   -1,
4632            1,   70,  -72, -117,  -78,   93,  121,  -23 } }
4633   };
4634 
4635   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4636     int8_t r[16];
4637     simde_memcpy(r, test_vec[i].i, 16);
4638 
4639     #if defined SIMDE_X86_SSE2_NATIVE && defined SIMDE_NATIVE_ALIASES_TESTING
4640       simde_mm_maskmoveu_si128(test_vec[i].a, test_vec[i].mask, HEDLEY_REINTERPRET_CAST(char *, r));
4641     #else
4642       simde_mm_maskmoveu_si128(test_vec[i].a, test_vec[i].mask, r);
4643     #endif
4644 
4645     simde_assert_equal_vi8(sizeof(r) / sizeof(r[0]), r, test_vec[i].r);
4646   }
4647 
4648   return 0;
4649 }
4650 
4651 static int
4652 test_simde_mm_min_epu8(SIMDE_MUNIT_TEST_ARGS) {
4653   const struct {
4654     simde__m128i a;
4655     simde__m128i b;
4656     simde__m128i r;
4657   } test_vec[8] = {
4658     { simde_x_mm_set_epu8(164, 110, 116,  95,  43, 222, 122,  21,
4659                            12,  65, 208, 248, 108, 166, 116,  17),
4660       simde_x_mm_set_epu8( 56,  62, 214,  11, 248, 124,  69,   2,
4661                           198, 169, 227, 124, 192, 250, 140,  32),
4662       simde_x_mm_set_epu8( 56,  62, 116,  11,  43, 124,  69,   2,
4663                            12,  65, 208, 124, 108, 166, 116,  17) },
4664     { simde_x_mm_set_epu8(136, 183, 233,  96,  11,  36, 213, 146,
4665                           238, 174,  59, 229,  56, 186, 203,  28),
4666       simde_x_mm_set_epu8( 82,  12,   9, 200, 196, 116, 127, 247,
4667                             1,  69, 178, 128,  65,  11, 179, 176),
4668       simde_x_mm_set_epu8( 82,  12,   9,  96,  11,  36, 127, 146,
4669                             1,  69,  59, 128,  56,  11, 179,  28) },
4670     { simde_x_mm_set_epu8(164,  64, 195, 253,  35,  95, 119, 110,
4671                           106, 102,  21, 173,  49,  40, 101, 151),
4672       simde_x_mm_set_epu8(109,  25,  32, 203,  64,  71,  33,  98,
4673                            48,  43, 195,  67, 254, 158, 167, 217),
4674       simde_x_mm_set_epu8(109,  25,  32, 203,  35,  71,  33,  98,
4675                            48,  43,  21,  67,  49,  40, 101, 151) },
4676     { simde_x_mm_set_epu8(233, 255, 136, 159, 118, 246,  37,   8,
4677                           195,  35,  70,   7,  91,  37,  20, 112),
4678       simde_x_mm_set_epu8(  4,  91, 243, 163, 160,  26, 137, 208,
4679                           146, 195, 124, 148,  53,  99,  21, 240),
4680       simde_x_mm_set_epu8(  4,  91, 136, 159, 118,  26,  37,   8,
4681                           146,  35,  70,   7,  53,  37,  20, 112) },
4682     { simde_x_mm_set_epu8(196,   4, 110, 234,  88, 121, 133, 146,
4683                           127, 167, 173, 105, 205,   0, 197, 107),
4684       simde_x_mm_set_epu8(240, 105, 248,  55, 202, 217, 219, 230,
4685                           183, 240,  91, 164, 168,   6,  75, 186),
4686       simde_x_mm_set_epu8(196,   4, 110,  55,  88, 121, 133, 146,
4687                           127, 167,  91, 105, 168,   0,  75, 107) },
4688     { simde_x_mm_set_epu8(191, 108, 145, 178, 194, 118, 187, 175,
4689                            80, 196,  99, 239,   6, 206, 186, 130),
4690       simde_x_mm_set_epu8(109, 182, 208,  91, 232, 171,  41, 238,
4691                           121, 144, 203,  42, 182,  89,  69, 166),
4692       simde_x_mm_set_epu8(109, 108, 145,  91, 194, 118,  41, 175,
4693                            80, 144,  99,  42,   6,  89,  69, 130) },
4694     { simde_x_mm_set_epu8(  5, 152, 184, 251, 233,  22, 184, 152,
4695                            12, 126, 120,  80, 191,  98,  37,  36),
4696       simde_x_mm_set_epu8(194, 116, 229, 250, 247, 241, 153, 192,
4697                            20, 172, 224, 148, 240, 246, 120,   0),
4698       simde_x_mm_set_epu8(  5, 116, 184, 250, 233,  22, 153, 152,
4699                            12, 126, 120,  80, 191,  98,  37,   0) },
4700     { simde_x_mm_set_epu8(188, 102, 137, 134, 213,   1, 140, 166,
4701                           143, 171, 248,  89, 128,  81, 226, 136),
4702       simde_x_mm_set_epu8(246, 210,  83, 107,  44, 255, 100, 235,
4703                            99, 233, 199, 197,   1, 159,  61,  31),
4704       simde_x_mm_set_epu8(188, 102,  83, 107,  44,   1, 100, 166,
4705                            99, 171, 199,  89,   1,  81,  61,  31) }
4706   };
4707 
4708   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4709     simde__m128i r = simde_mm_min_epu8(test_vec[i].a, test_vec[i].b);
4710     simde_assert_m128i_u8(r, ==, test_vec[i].r);
4711   }
4712 
4713   return 0;
4714 }
4715 
4716 static int
4717 test_simde_mm_min_epi16(SIMDE_MUNIT_TEST_ARGS) {
4718   const struct {
4719     simde__m128i a;
4720     simde__m128i b;
4721     simde__m128i r;
4722   } test_vec[8] = {
4723     { simde_mm_set_epi16(INT16_C(-23442), INT16_C( 29791), INT16_C( 11230), INT16_C( 31253),
4724                          INT16_C(  3137), INT16_C(-12040), INT16_C( 27814), INT16_C( 29713)),
4725       simde_mm_set_epi16(INT16_C( 14398), INT16_C(-10741), INT16_C( -1924), INT16_C( 17666),
4726                          INT16_C(-14679), INT16_C( -7300), INT16_C(-16134), INT16_C(-29664)),
4727       simde_mm_set_epi16(INT16_C(-23442), INT16_C(-10741), INT16_C( -1924), INT16_C( 17666),
4728                          INT16_C(-14679), INT16_C(-12040), INT16_C(-16134), INT16_C(-29664)) },
4729     { simde_mm_set_epi16(INT16_C(-30537), INT16_C( -5792), INT16_C(  2852), INT16_C(-10862),
4730                          INT16_C( -4434), INT16_C( 15333), INT16_C( 14522), INT16_C(-13540)),
4731       simde_mm_set_epi16(INT16_C( 21004), INT16_C(  2504), INT16_C(-15244), INT16_C( 32759),
4732                          INT16_C(   325), INT16_C(-19840), INT16_C( 16651), INT16_C(-19536)),
4733       simde_mm_set_epi16(INT16_C(-30537), INT16_C( -5792), INT16_C(-15244), INT16_C(-10862),
4734                          INT16_C( -4434), INT16_C(-19840), INT16_C( 14522), INT16_C(-19536)) },
4735     { simde_mm_set_epi16(INT16_C(-23488), INT16_C(-15363), INT16_C(  9055), INT16_C( 30574),
4736                          INT16_C( 27238), INT16_C(  5549), INT16_C( 12584), INT16_C( 26007)),
4737       simde_mm_set_epi16(INT16_C( 27929), INT16_C(  8395), INT16_C( 16455), INT16_C(  8546),
4738                          INT16_C( 12331), INT16_C(-15549), INT16_C(  -354), INT16_C(-22567)),
4739       simde_mm_set_epi16(INT16_C(-23488), INT16_C(-15363), INT16_C(  9055), INT16_C(  8546),
4740                          INT16_C( 12331), INT16_C(-15549), INT16_C(  -354), INT16_C(-22567)) },
4741     { simde_mm_set_epi16(INT16_C( -5633), INT16_C(-30561), INT16_C( 30454), INT16_C(  9480),
4742                          INT16_C(-15581), INT16_C( 17927), INT16_C( 23333), INT16_C(  5232)),
4743       simde_mm_set_epi16(INT16_C(  1115), INT16_C( -3165), INT16_C(-24550), INT16_C(-30256),
4744                          INT16_C(-27965), INT16_C( 31892), INT16_C( 13667), INT16_C(  5616)),
4745       simde_mm_set_epi16(INT16_C( -5633), INT16_C(-30561), INT16_C(-24550), INT16_C(-30256),
4746                          INT16_C(-27965), INT16_C( 17927), INT16_C( 13667), INT16_C(  5232)) },
4747     { simde_mm_set_epi16(INT16_C(-15356), INT16_C( 28394), INT16_C( 22649), INT16_C(-31342),
4748                          INT16_C( 32679), INT16_C(-21143), INT16_C(-13056), INT16_C(-14997)),
4749       simde_mm_set_epi16(INT16_C( -3991), INT16_C( -1993), INT16_C(-13607), INT16_C( -9242),
4750                          INT16_C(-18448), INT16_C( 23460), INT16_C(-22522), INT16_C( 19386)),
4751       simde_mm_set_epi16(INT16_C(-15356), INT16_C( -1993), INT16_C(-13607), INT16_C(-31342),
4752                          INT16_C(-18448), INT16_C(-21143), INT16_C(-22522), INT16_C(-14997)) },
4753     { simde_mm_set_epi16(INT16_C(-16532), INT16_C(-28238), INT16_C(-15754), INT16_C(-17489),
4754                          INT16_C( 20676), INT16_C( 25583), INT16_C(  1742), INT16_C(-17790)),
4755       simde_mm_set_epi16(INT16_C( 28086), INT16_C(-12197), INT16_C( -5973), INT16_C( 10734),
4756                          INT16_C( 31120), INT16_C(-13526), INT16_C(-18855), INT16_C( 17830)),
4757       simde_mm_set_epi16(INT16_C(-16532), INT16_C(-28238), INT16_C(-15754), INT16_C(-17489),
4758                          INT16_C( 20676), INT16_C(-13526), INT16_C(-18855), INT16_C(-17790)) },
4759     { simde_mm_set_epi16(INT16_C(  1432), INT16_C(-18181), INT16_C( -5866), INT16_C(-18280),
4760                          INT16_C(  3198), INT16_C( 30800), INT16_C(-16542), INT16_C(  9508)),
4761       simde_mm_set_epi16(INT16_C(-15756), INT16_C( -6662), INT16_C( -2063), INT16_C(-26176),
4762                          INT16_C(  5292), INT16_C( -8044), INT16_C( -3850), INT16_C( 30720)),
4763       simde_mm_set_epi16(INT16_C(-15756), INT16_C(-18181), INT16_C( -5866), INT16_C(-26176),
4764                          INT16_C(  3198), INT16_C( -8044), INT16_C(-16542), INT16_C(  9508)) },
4765     { simde_mm_set_epi16(INT16_C(-17306), INT16_C(-30330), INT16_C(-11007), INT16_C(-29530),
4766                          INT16_C(-28757), INT16_C( -1959), INT16_C(-32687), INT16_C( -7544)),
4767       simde_mm_set_epi16(INT16_C( -2350), INT16_C( 21355), INT16_C( 11519), INT16_C( 25835),
4768                          INT16_C( 25577), INT16_C(-14395), INT16_C(   415), INT16_C( 15647)),
4769       simde_mm_set_epi16(INT16_C(-17306), INT16_C(-30330), INT16_C(-11007), INT16_C(-29530),
4770                          INT16_C(-28757), INT16_C(-14395), INT16_C(-32687), INT16_C( -7544)) }
4771   };
4772 
4773   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4774     simde__m128i r = simde_mm_min_epi16(test_vec[i].a, test_vec[i].b);
4775     simde_assert_m128i_u16(r, ==, test_vec[i].r);
4776   }
4777 
4778   return 0;
4779 }
4780 
4781 static int
4782 test_simde_mm_min_pd(SIMDE_MUNIT_TEST_ARGS) {
4783   const struct {
4784     simde__m128d a;
4785     simde__m128d b;
4786     simde__m128d r;
4787   } test_vec[10] = {
4788     { simde_mm_set_pd(SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -514.32)),
4789       simde_mm_set_pd(SIMDE_FLOAT64_C(  342.71), SIMDE_FLOAT64_C(  927.58)),
4790       simde_mm_set_pd(SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -514.32)) },
4791     { simde_mm_set_pd(SIMDE_FLOAT64_C( -704.64), SIMDE_FLOAT64_C(  925.40)),
4792       simde_mm_set_pd(SIMDE_FLOAT64_C( -589.60), SIMDE_FLOAT64_C( -498.63)),
4793       simde_mm_set_pd(SIMDE_FLOAT64_C( -704.64), SIMDE_FLOAT64_C( -498.63)) },
4794     { simde_mm_set_pd(SIMDE_FLOAT64_C( -244.39), SIMDE_FLOAT64_C(  572.76)),
4795       simde_mm_set_pd(SIMDE_FLOAT64_C(  -10.04), SIMDE_FLOAT64_C(  293.99)),
4796       simde_mm_set_pd(SIMDE_FLOAT64_C( -244.39), SIMDE_FLOAT64_C(  293.99)) },
4797     { simde_mm_set_pd(SIMDE_FLOAT64_C(  321.70), SIMDE_FLOAT64_C( -283.39)),
4798       simde_mm_set_pd(SIMDE_FLOAT64_C(   60.35), SIMDE_FLOAT64_C( -248.75)),
4799       simde_mm_set_pd(SIMDE_FLOAT64_C(   60.35), SIMDE_FLOAT64_C( -283.39)) },
4800     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -67.84), SIMDE_FLOAT64_C(  763.91)),
4801       simde_mm_set_pd(SIMDE_FLOAT64_C(  150.47), SIMDE_FLOAT64_C( -773.85)),
4802       simde_mm_set_pd(SIMDE_FLOAT64_C(  -67.84), SIMDE_FLOAT64_C( -773.85)) },
4803     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -42.86), SIMDE_FLOAT64_C(  169.28)),
4804       simde_mm_set_pd(SIMDE_FLOAT64_C( -820.89), SIMDE_FLOAT64_C(  325.20)),
4805       simde_mm_set_pd(SIMDE_FLOAT64_C( -820.89), SIMDE_FLOAT64_C(  169.28)) },
4806     { simde_mm_set_pd(SIMDE_FLOAT64_C( -976.90), SIMDE_FLOAT64_C(  883.11)),
4807       simde_mm_set_pd(SIMDE_FLOAT64_C( -450.39), SIMDE_FLOAT64_C( -249.21)),
4808       simde_mm_set_pd(SIMDE_FLOAT64_C( -976.90), SIMDE_FLOAT64_C( -249.21)) },
4809     { simde_mm_set_pd(SIMDE_FLOAT64_C(  495.76), SIMDE_FLOAT64_C(  415.93)),
4810       simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( -567.72)),
4811       simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( -567.72)) },
4812     { simde_mm_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C( -567.72)),
4813       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C( -567.72)),
4814       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C( -567.72)) },
4815     { simde_mm_set_pd(SIMDE_FLOAT64_C(  495.76), SIMDE_FLOAT64_C(    0.00)),
4816       simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C(    0.00)),
4817       simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C(    0.00)) },
4818   };
4819 
4820   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4821     simde__m128d r = simde_mm_min_pd(test_vec[i].a, test_vec[i].b);
4822     simde_assert_m128d_close(r, test_vec[i].r, 1);
4823   }
4824 
4825   return 0;
4826 }
4827 
4828 static int
4829 test_simde_mm_min_sd(SIMDE_MUNIT_TEST_ARGS) {
4830   const struct {
4831     simde__m128d a;
4832     simde__m128d b;
4833     simde__m128d r;
4834   } test_vec[8] = {
4835     { simde_mm_set_pd(SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -514.32)),
4836       simde_mm_set_pd(SIMDE_FLOAT64_C(  342.71), SIMDE_FLOAT64_C(  927.58)),
4837       simde_mm_set_pd(SIMDE_FLOAT64_C( -927.67), SIMDE_FLOAT64_C( -514.32)) },
4838     { simde_mm_set_pd(SIMDE_FLOAT64_C( -704.64), SIMDE_FLOAT64_C(  925.40)),
4839       simde_mm_set_pd(SIMDE_FLOAT64_C( -589.60), SIMDE_FLOAT64_C( -498.63)),
4840       simde_mm_set_pd(SIMDE_FLOAT64_C( -704.64), SIMDE_FLOAT64_C( -498.63)) },
4841     { simde_mm_set_pd(SIMDE_FLOAT64_C( -244.39), SIMDE_FLOAT64_C(  572.76)),
4842       simde_mm_set_pd(SIMDE_FLOAT64_C(  -10.04), SIMDE_FLOAT64_C(  293.99)),
4843       simde_mm_set_pd(SIMDE_FLOAT64_C( -244.39), SIMDE_FLOAT64_C(  293.99)) },
4844     { simde_mm_set_pd(SIMDE_FLOAT64_C(  321.70), SIMDE_FLOAT64_C( -283.39)),
4845       simde_mm_set_pd(SIMDE_FLOAT64_C(   60.35), SIMDE_FLOAT64_C( -248.75)),
4846       simde_mm_set_pd(SIMDE_FLOAT64_C(  321.70), SIMDE_FLOAT64_C( -283.39)) },
4847     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -67.84), SIMDE_FLOAT64_C(  763.91)),
4848       simde_mm_set_pd(SIMDE_FLOAT64_C(  150.47), SIMDE_FLOAT64_C( -773.85)),
4849       simde_mm_set_pd(SIMDE_FLOAT64_C(  -67.84), SIMDE_FLOAT64_C( -773.85)) },
4850     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -42.86), SIMDE_FLOAT64_C(  169.28)),
4851       simde_mm_set_pd(SIMDE_FLOAT64_C( -820.89), SIMDE_FLOAT64_C(  325.20)),
4852       simde_mm_set_pd(SIMDE_FLOAT64_C(  -42.86), SIMDE_FLOAT64_C(  169.28)) },
4853     { simde_mm_set_pd(SIMDE_FLOAT64_C( -976.90), SIMDE_FLOAT64_C(  883.11)),
4854       simde_mm_set_pd(SIMDE_FLOAT64_C( -450.39), SIMDE_FLOAT64_C( -249.21)),
4855       simde_mm_set_pd(SIMDE_FLOAT64_C( -976.90), SIMDE_FLOAT64_C( -249.21)) },
4856     { simde_mm_set_pd(SIMDE_FLOAT64_C(  495.76), SIMDE_FLOAT64_C(  415.93)),
4857       simde_mm_set_pd(SIMDE_FLOAT64_C( -979.87), SIMDE_FLOAT64_C( -567.72)),
4858       simde_mm_set_pd(SIMDE_FLOAT64_C(  495.76), SIMDE_FLOAT64_C( -567.72)) }
4859   };
4860 
4861   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4862     simde__m128d r = simde_mm_min_sd(test_vec[i].a, test_vec[i].b);
4863     simde_assert_m128d_close(r, test_vec[i].r, 1);
4864   }
4865 
4866   return 0;
4867 }
4868 
4869 static int
4870 test_simde_mm_max_epu8(SIMDE_MUNIT_TEST_ARGS) {
4871   const struct {
4872     simde__m128i a;
4873     simde__m128i b;
4874     simde__m128i r;
4875   } test_vec[8] = {
4876     { simde_x_mm_set_epu8(168, 216, 116,  83,  89,  36,  70,  43,
4877                           213,  80, 178, 134,  66, 113, 243, 129),
4878       simde_x_mm_set_epu8(121,  26,  77, 212,  58, 167,  83, 180,
4879                           236,  50,  65, 112, 248, 228,  87, 231),
4880       simde_x_mm_set_epu8(168, 216, 116, 212,  89, 167,  83, 180,
4881                           236,  80, 178, 134, 248, 228, 243, 231) },
4882     { simde_x_mm_set_epu8(157, 143, 244, 146,  49, 140, 145, 221,
4883                            80,  79, 154,  71,  49, 213, 105,  22),
4884       simde_x_mm_set_epu8( 81,  58, 115, 104,  26, 180,  63,  33,
4885                            16, 205,  98, 228, 235, 156, 147, 109),
4886       simde_x_mm_set_epu8(157, 143, 244, 146,  49, 180, 145, 221,
4887                            80, 205, 154, 228, 235, 213, 147, 109) },
4888     { simde_x_mm_set_epu8(120, 153, 102, 244, 149, 171, 101, 141,
4889                           231, 205, 156,  11, 214, 255,  28, 215),
4890       simde_x_mm_set_epu8( 89, 227, 119,  48, 219,  88,   0,  68,
4891                           146, 196, 199,  34, 143, 246, 184,  31),
4892       simde_x_mm_set_epu8(120, 227, 119, 244, 219, 171, 101, 141,
4893                           231, 205, 199,  34, 214, 255, 184, 215) },
4894     { simde_x_mm_set_epu8(201, 221,  39,  38, 119, 106,  89, 236,
4895                             8,  81, 136,  17,  62,  33, 200,  24),
4896       simde_x_mm_set_epu8( 76, 153, 167,  42, 171, 206,  46, 181,
4897                            37, 117,  72, 251, 153,  91, 107,  96),
4898       simde_x_mm_set_epu8(201, 221, 167,  42, 171, 206,  89, 236,
4899                            37, 117, 136, 251, 153,  91, 200,  96) },
4900     { simde_x_mm_set_epu8( 34,  94, 125,  66, 238, 110, 110,  27,
4901                            90, 179, 184, 250, 202,  62, 132,  68),
4902       simde_x_mm_set_epu8(135, 208,  31,  76,  51,   5,  50, 220,
4903                            43, 120,  10, 131, 247, 241, 134, 232),
4904       simde_x_mm_set_epu8(135, 208, 125,  76, 238, 110, 110, 220,
4905                            90, 179, 184, 250, 247, 241, 134, 232) },
4906     { simde_x_mm_set_epu8( 12, 112,  35,  12, 111,   1,  16, 229,
4907                           119, 199,  69,  96, 220, 123, 153, 230),
4908       simde_x_mm_set_epu8(147, 155,  56, 136, 236,  16,  93,  16,
4909                            43, 253, 136, 239, 147,  44, 146,   0),
4910       simde_x_mm_set_epu8(147, 155,  56, 136, 236,  16,  93, 229,
4911                           119, 253, 136, 239, 220, 123, 153, 230) },
4912     { simde_x_mm_set_epu8(138, 177,  86, 183, 144, 112,  42,  67,
4913                           100, 123, 214, 234,  34, 240,  19,  10),
4914       simde_x_mm_set_epu8( 81,  53, 255, 195, 169, 127, 131, 109,
4915                           181, 161, 246, 113,  87,  20, 157, 194),
4916       simde_x_mm_set_epu8(138, 177, 255, 195, 169, 127, 131, 109,
4917                           181, 161, 246, 234,  87, 240, 157, 194) },
4918     { simde_x_mm_set_epu8(  5,  26, 135,  12, 151, 226, 187,  12,
4919                           213, 244, 209, 245, 136,  13, 164, 249),
4920       simde_x_mm_set_epu8( 92,  57, 200, 208, 212, 214, 211, 217,
4921                            65, 228, 179,  64,  34, 236, 220, 208),
4922       simde_x_mm_set_epu8( 92,  57, 200, 208, 212, 226, 211, 217,
4923                           213, 244, 209, 245, 136, 236, 220, 249) }
4924   };
4925 
4926   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4927     simde__m128i r = simde_mm_max_epu8(test_vec[i].a, test_vec[i].b);
4928     simde_assert_m128i_u8(r, ==, test_vec[i].r);
4929   }
4930 
4931   return 0;
4932 }
4933 
4934 static int
4935 test_simde_mm_max_epi16(SIMDE_MUNIT_TEST_ARGS) {
4936   const struct {
4937     simde__m128i a;
4938     simde__m128i b;
4939     simde__m128i r;
4940   } test_vec[8] = {
4941     { simde_mm_set_epi16(INT16_C(-22312), INT16_C( 29779), INT16_C( 22820), INT16_C( 17963),
4942                          INT16_C(-10928), INT16_C(-19834), INT16_C( 17009), INT16_C( -3199)),
4943       simde_mm_set_epi16(INT16_C( 31002), INT16_C( 19924), INT16_C( 15015), INT16_C( 21428),
4944                          INT16_C( -5070), INT16_C( 16752), INT16_C( -1820), INT16_C( 22503)),
4945       simde_mm_set_epi16(INT16_C( 31002), INT16_C( 29779), INT16_C( 22820), INT16_C( 21428),
4946                          INT16_C( -5070), INT16_C( 16752), INT16_C( 17009), INT16_C( 22503)) },
4947     { simde_mm_set_epi16(INT16_C(-25201), INT16_C( -2926), INT16_C( 12684), INT16_C(-28195),
4948                          INT16_C( 20559), INT16_C(-26041), INT16_C( 12757), INT16_C( 26902)),
4949       simde_mm_set_epi16(INT16_C( 20794), INT16_C( 29544), INT16_C(  6836), INT16_C( 16161),
4950                          INT16_C(  4301), INT16_C( 25316), INT16_C( -5220), INT16_C(-27795)),
4951       simde_mm_set_epi16(INT16_C( 20794), INT16_C( 29544), INT16_C( 12684), INT16_C( 16161),
4952                          INT16_C( 20559), INT16_C( 25316), INT16_C( 12757), INT16_C( 26902)) },
4953     { simde_mm_set_epi16(INT16_C( 30873), INT16_C( 26356), INT16_C(-27221), INT16_C( 25997),
4954                          INT16_C( -6195), INT16_C(-25589), INT16_C(-10497), INT16_C(  7383)),
4955       simde_mm_set_epi16(INT16_C( 23011), INT16_C( 30512), INT16_C( -9384), INT16_C(    68),
4956                          INT16_C(-27964), INT16_C(-14558), INT16_C(-28682), INT16_C(-18401)),
4957       simde_mm_set_epi16(INT16_C( 30873), INT16_C( 30512), INT16_C( -9384), INT16_C( 25997),
4958                          INT16_C( -6195), INT16_C(-14558), INT16_C(-10497), INT16_C(  7383)) },
4959     { simde_mm_set_epi16(INT16_C(-13859), INT16_C( 10022), INT16_C( 30570), INT16_C( 23020),
4960                          INT16_C(  2129), INT16_C(-30703), INT16_C( 15905), INT16_C(-14312)),
4961       simde_mm_set_epi16(INT16_C( 19609), INT16_C(-22742), INT16_C(-21554), INT16_C( 11957),
4962                          INT16_C(  9589), INT16_C( 18683), INT16_C(-26277), INT16_C( 27488)),
4963       simde_mm_set_epi16(INT16_C( 19609), INT16_C( 10022), INT16_C( 30570), INT16_C( 23020),
4964                          INT16_C(  9589), INT16_C( 18683), INT16_C( 15905), INT16_C( 27488)) },
4965     { simde_mm_set_epi16(INT16_C(  8798), INT16_C( 32066), INT16_C( -4498), INT16_C( 28187),
4966                          INT16_C( 23219), INT16_C(-18182), INT16_C(-13762), INT16_C(-31676)),
4967       simde_mm_set_epi16(INT16_C(-30768), INT16_C(  8012), INT16_C( 13061), INT16_C( 13020),
4968                          INT16_C( 11128), INT16_C(  2691), INT16_C( -2063), INT16_C(-31000)),
4969       simde_mm_set_epi16(INT16_C(  8798), INT16_C( 32066), INT16_C( 13061), INT16_C( 28187),
4970                          INT16_C( 23219), INT16_C(  2691), INT16_C( -2063), INT16_C(-31000)) },
4971     { simde_mm_set_epi16(INT16_C(  3184), INT16_C(  8972), INT16_C( 28417), INT16_C(  4325),
4972                          INT16_C( 30663), INT16_C( 17760), INT16_C( -9093), INT16_C(-26138)),
4973       simde_mm_set_epi16(INT16_C(-27749), INT16_C( 14472), INT16_C( -5104), INT16_C( 23824),
4974                          INT16_C( 11261), INT16_C(-30481), INT16_C(-27860), INT16_C(-28160)),
4975       simde_mm_set_epi16(INT16_C(  3184), INT16_C( 14472), INT16_C( 28417), INT16_C( 23824),
4976                          INT16_C( 30663), INT16_C( 17760), INT16_C( -9093), INT16_C(-26138)) },
4977     { simde_mm_set_epi16(INT16_C(-30031), INT16_C( 22199), INT16_C(-28560), INT16_C( 10819),
4978                          INT16_C( 25723), INT16_C(-10518), INT16_C(  8944), INT16_C(  4874)),
4979       simde_mm_set_epi16(INT16_C( 20789), INT16_C(   -61), INT16_C(-22145), INT16_C(-31891),
4980                          INT16_C(-19039), INT16_C( -2447), INT16_C( 22292), INT16_C(-25150)),
4981       simde_mm_set_epi16(INT16_C( 20789), INT16_C( 22199), INT16_C(-22145), INT16_C( 10819),
4982                          INT16_C( 25723), INT16_C( -2447), INT16_C( 22292), INT16_C(  4874)) },
4983     { simde_mm_set_epi16(INT16_C(  1306), INT16_C(-30964), INT16_C(-26654), INT16_C(-17652),
4984                          INT16_C(-10764), INT16_C(-11787), INT16_C(-30707), INT16_C(-23303)),
4985       simde_mm_set_epi16(INT16_C( 23609), INT16_C(-14128), INT16_C(-11050), INT16_C(-11303),
4986                          INT16_C( 16868), INT16_C(-19648), INT16_C(  8940), INT16_C( -9008)),
4987       simde_mm_set_epi16(INT16_C( 23609), INT16_C(-14128), INT16_C(-11050), INT16_C(-11303),
4988                          INT16_C( 16868), INT16_C(-11787), INT16_C(  8940), INT16_C( -9008)) }
4989   };
4990 
4991   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
4992     simde__m128i r = simde_mm_max_epi16(test_vec[i].a, test_vec[i].b);
4993     simde_assert_m128i_i16(r, ==, test_vec[i].r);
4994   }
4995 
4996   return 0;
4997 }
4998 
4999 static int
5000 test_simde_mm_max_pd(SIMDE_MUNIT_TEST_ARGS) {
5001   const struct {
5002     simde__m128d a;
5003     simde__m128d b;
5004     simde__m128d r;
5005   } test_vec[8] = {
5006     { simde_mm_set_pd(SIMDE_FLOAT64_C( -303.58), SIMDE_FLOAT64_C( -480.90)),
5007       simde_mm_set_pd(SIMDE_FLOAT64_C(  319.11), SIMDE_FLOAT64_C(  666.53)),
5008       simde_mm_set_pd(SIMDE_FLOAT64_C(  319.11), SIMDE_FLOAT64_C(  666.53)) },
5009     { simde_mm_set_pd(SIMDE_FLOAT64_C( -541.77), SIMDE_FLOAT64_C(  944.47)),
5010       simde_mm_set_pd(SIMDE_FLOAT64_C(  -53.88), SIMDE_FLOAT64_C(  845.28)),
5011       simde_mm_set_pd(SIMDE_FLOAT64_C(  -53.88), SIMDE_FLOAT64_C(  944.47)) },
5012     { simde_mm_set_pd(SIMDE_FLOAT64_C( -612.90), SIMDE_FLOAT64_C( -610.67)),
5013       simde_mm_set_pd(SIMDE_FLOAT64_C(  230.96), SIMDE_FLOAT64_C( -372.57)),
5014       simde_mm_set_pd(SIMDE_FLOAT64_C(  230.96), SIMDE_FLOAT64_C( -372.57)) },
5015     { simde_mm_set_pd(SIMDE_FLOAT64_C( -791.37), SIMDE_FLOAT64_C(  840.72)),
5016       simde_mm_set_pd(SIMDE_FLOAT64_C( -365.40), SIMDE_FLOAT64_C( -868.73)),
5017       simde_mm_set_pd(SIMDE_FLOAT64_C( -365.40), SIMDE_FLOAT64_C(  840.72)) },
5018     { simde_mm_set_pd(SIMDE_FLOAT64_C(  169.29), SIMDE_FLOAT64_C(  679.66)),
5019       simde_mm_set_pd(SIMDE_FLOAT64_C(  -57.82), SIMDE_FLOAT64_C(  810.96)),
5020       simde_mm_set_pd(SIMDE_FLOAT64_C(  169.29), SIMDE_FLOAT64_C(  810.96)) },
5021     { simde_mm_set_pd(SIMDE_FLOAT64_C(  713.62), SIMDE_FLOAT64_C(  124.72)),
5022       simde_mm_set_pd(SIMDE_FLOAT64_C( -297.75), SIMDE_FLOAT64_C(  146.63)),
5023       simde_mm_set_pd(SIMDE_FLOAT64_C(  713.62), SIMDE_FLOAT64_C(  146.63)) },
5024     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -67.07), SIMDE_FLOAT64_C( -514.59)),
5025       simde_mm_set_pd(SIMDE_FLOAT64_C(  577.06), SIMDE_FLOAT64_C( -935.01)),
5026       simde_mm_set_pd(SIMDE_FLOAT64_C(  577.06), SIMDE_FLOAT64_C( -514.59)) },
5027     { simde_mm_set_pd(SIMDE_FLOAT64_C(  342.23), SIMDE_FLOAT64_C(  198.10)),
5028       simde_mm_set_pd(SIMDE_FLOAT64_C( -401.56), SIMDE_FLOAT64_C( -707.36)),
5029       simde_mm_set_pd(SIMDE_FLOAT64_C(  342.23), SIMDE_FLOAT64_C(  198.10)) }
5030   };
5031 
5032   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5033     simde__m128d r = simde_mm_max_pd(test_vec[i].a, test_vec[i].b);
5034     simde_assert_m128d_close(r, test_vec[i].r, 1);
5035   }
5036 
5037   return 0;
5038 }
5039 
5040 static int
5041 test_simde_mm_max_sd(SIMDE_MUNIT_TEST_ARGS) {
5042   const struct {
5043     simde__m128d a;
5044     simde__m128d b;
5045     simde__m128d r;
5046   } test_vec[8] = {
5047     { simde_mm_set_pd(SIMDE_FLOAT64_C( -303.58), SIMDE_FLOAT64_C( -480.90)),
5048       simde_mm_set_pd(SIMDE_FLOAT64_C(  319.11), SIMDE_FLOAT64_C(  666.53)),
5049       simde_mm_set_pd(SIMDE_FLOAT64_C( -303.58), SIMDE_FLOAT64_C(  666.53)) },
5050     { simde_mm_set_pd(SIMDE_FLOAT64_C( -541.77), SIMDE_FLOAT64_C(  944.47)),
5051       simde_mm_set_pd(SIMDE_FLOAT64_C(  -53.88), SIMDE_FLOAT64_C(  845.28)),
5052       simde_mm_set_pd(SIMDE_FLOAT64_C( -541.77), SIMDE_FLOAT64_C(  944.47)) },
5053     { simde_mm_set_pd(SIMDE_FLOAT64_C( -612.90), SIMDE_FLOAT64_C( -610.67)),
5054       simde_mm_set_pd(SIMDE_FLOAT64_C(  230.96), SIMDE_FLOAT64_C( -372.57)),
5055       simde_mm_set_pd(SIMDE_FLOAT64_C( -612.90), SIMDE_FLOAT64_C( -372.57)) },
5056     { simde_mm_set_pd(SIMDE_FLOAT64_C( -791.37), SIMDE_FLOAT64_C(  840.72)),
5057       simde_mm_set_pd(SIMDE_FLOAT64_C( -365.40), SIMDE_FLOAT64_C( -868.73)),
5058       simde_mm_set_pd(SIMDE_FLOAT64_C( -791.37), SIMDE_FLOAT64_C(  840.72)) },
5059     { simde_mm_set_pd(SIMDE_FLOAT64_C(  169.29), SIMDE_FLOAT64_C(  679.66)),
5060       simde_mm_set_pd(SIMDE_FLOAT64_C(  -57.82), SIMDE_FLOAT64_C(  810.96)),
5061       simde_mm_set_pd(SIMDE_FLOAT64_C(  169.29), SIMDE_FLOAT64_C(  810.96)) },
5062     { simde_mm_set_pd(SIMDE_FLOAT64_C(  713.62), SIMDE_FLOAT64_C(  124.72)),
5063       simde_mm_set_pd(SIMDE_FLOAT64_C( -297.75), SIMDE_FLOAT64_C(  146.63)),
5064       simde_mm_set_pd(SIMDE_FLOAT64_C(  713.62), SIMDE_FLOAT64_C(  146.63)) },
5065     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -67.07), SIMDE_FLOAT64_C( -514.59)),
5066       simde_mm_set_pd(SIMDE_FLOAT64_C(  577.06), SIMDE_FLOAT64_C( -935.01)),
5067       simde_mm_set_pd(SIMDE_FLOAT64_C(  -67.07), SIMDE_FLOAT64_C( -514.59)) },
5068     { simde_mm_set_pd(SIMDE_FLOAT64_C(  342.23), SIMDE_FLOAT64_C(  198.10)),
5069       simde_mm_set_pd(SIMDE_FLOAT64_C( -401.56), SIMDE_FLOAT64_C( -707.36)),
5070       simde_mm_set_pd(SIMDE_FLOAT64_C(  342.23), SIMDE_FLOAT64_C(  198.10)) }
5071   };
5072 
5073   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5074     simde__m128d r = simde_mm_max_sd(test_vec[i].a, test_vec[i].b);
5075     simde_assert_m128d_close(r, test_vec[i].r, 1);
5076   }
5077 
5078   return 0;
5079 }
5080 
5081 static int
5082 test_simde_mm_move_epi64(SIMDE_MUNIT_TEST_ARGS) {
5083   const struct {
5084     simde__m128i a;
5085     simde__m128i r;
5086   } test_vec[8] = {
5087     { simde_mm_set_epi64x(INT64_C(-2982745844705455901), INT64_C( 4775804171231816037)),
5088       simde_mm_set_epi64x(INT64_C(                   0), INT64_C( 4775804171231816037)) },
5089     { simde_mm_set_epi64x(INT64_C( 5762346410957661033), INT64_C( 2977172799723381810)),
5090       simde_mm_set_epi64x(INT64_C(                   0), INT64_C( 2977172799723381810)) },
5091     { simde_mm_set_epi64x(INT64_C( 1008079402021318109), INT64_C( 2502061726771043310)),
5092       simde_mm_set_epi64x(INT64_C(                   0), INT64_C( 2502061726771043310)) },
5093     { simde_mm_set_epi64x(INT64_C( 5339677830223010942), INT64_C( 8124798084034539527)),
5094       simde_mm_set_epi64x(INT64_C(                   0), INT64_C( 8124798084034539527)) },
5095     { simde_mm_set_epi64x(INT64_C(-4521066662096167363), INT64_C( -947809468227977762)),
5096       simde_mm_set_epi64x(INT64_C(                   0), INT64_C( -947809468227977762)) },
5097     { simde_mm_set_epi64x(INT64_C(-1218747510360922612), INT64_C( 3122441631876631480)),
5098       simde_mm_set_epi64x(INT64_C(                   0), INT64_C( 3122441631876631480)) },
5099     { simde_mm_set_epi64x(INT64_C(-3523922424397514946), INT64_C( -108841976580709576)),
5100       simde_mm_set_epi64x(INT64_C(                   0), INT64_C( -108841976580709576)) },
5101     { simde_mm_set_epi64x(INT64_C(-3961221708434347271), INT64_C(-1875395594913971276)),
5102       simde_mm_set_epi64x(INT64_C(                   0), INT64_C(-1875395594913971276)) }
5103   };
5104 
5105   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5106     simde__m128i r = simde_mm_move_epi64(test_vec[i].a);
5107     simde_assert_m128i_i64(r, ==, test_vec[i].r);
5108   }
5109 
5110   return 0;
5111 }
5112 
5113 static int
5114 test_simde_mm_move_sd(SIMDE_MUNIT_TEST_ARGS) {
5115   const struct {
5116     simde__m128d a;
5117     simde__m128d b;
5118     simde__m128d r;
5119   } test_vec[8] = {
5120     { simde_mm_set_pd(SIMDE_FLOAT64_C(  570.81), SIMDE_FLOAT64_C( -941.79)),
5121       simde_mm_set_pd(SIMDE_FLOAT64_C( -875.32), SIMDE_FLOAT64_C( -356.30)),
5122       simde_mm_set_pd(SIMDE_FLOAT64_C(  570.81), SIMDE_FLOAT64_C( -356.30)) },
5123     { simde_mm_set_pd(SIMDE_FLOAT64_C( -816.08), SIMDE_FLOAT64_C(  126.72)),
5124       simde_mm_set_pd(SIMDE_FLOAT64_C( -886.56), SIMDE_FLOAT64_C(  985.06)),
5125       simde_mm_set_pd(SIMDE_FLOAT64_C( -816.08), SIMDE_FLOAT64_C(  985.06)) },
5126     { simde_mm_set_pd(SIMDE_FLOAT64_C(  358.01), SIMDE_FLOAT64_C(   59.13)),
5127       simde_mm_set_pd(SIMDE_FLOAT64_C(  -61.40), SIMDE_FLOAT64_C( -717.39)),
5128       simde_mm_set_pd(SIMDE_FLOAT64_C(  358.01), SIMDE_FLOAT64_C( -717.39)) },
5129     { simde_mm_set_pd(SIMDE_FLOAT64_C(  110.04), SIMDE_FLOAT64_C(  -44.09)),
5130       simde_mm_set_pd(SIMDE_FLOAT64_C( -614.96), SIMDE_FLOAT64_C( -267.93)),
5131       simde_mm_set_pd(SIMDE_FLOAT64_C(  110.04), SIMDE_FLOAT64_C( -267.93)) },
5132     { simde_mm_set_pd(SIMDE_FLOAT64_C(  571.79), SIMDE_FLOAT64_C( -816.23)),
5133       simde_mm_set_pd(SIMDE_FLOAT64_C(  917.45), SIMDE_FLOAT64_C(  287.41)),
5134       simde_mm_set_pd(SIMDE_FLOAT64_C(  571.79), SIMDE_FLOAT64_C(  287.41)) },
5135     { simde_mm_set_pd(SIMDE_FLOAT64_C(  827.50), SIMDE_FLOAT64_C(  261.09)),
5136       simde_mm_set_pd(SIMDE_FLOAT64_C(  478.77), SIMDE_FLOAT64_C(   33.99)),
5137       simde_mm_set_pd(SIMDE_FLOAT64_C(  827.50), SIMDE_FLOAT64_C(   33.99)) },
5138     { simde_mm_set_pd(SIMDE_FLOAT64_C( -335.82), SIMDE_FLOAT64_C(  465.36)),
5139       simde_mm_set_pd(SIMDE_FLOAT64_C( -993.24), SIMDE_FLOAT64_C(  100.89)),
5140       simde_mm_set_pd(SIMDE_FLOAT64_C( -335.82), SIMDE_FLOAT64_C(  100.89)) },
5141     { simde_mm_set_pd(SIMDE_FLOAT64_C(  415.58), SIMDE_FLOAT64_C( -984.83)),
5142       simde_mm_set_pd(SIMDE_FLOAT64_C(  764.57), SIMDE_FLOAT64_C(  672.72)),
5143       simde_mm_set_pd(SIMDE_FLOAT64_C(  415.58), SIMDE_FLOAT64_C(  672.72)) }
5144   };
5145 
5146   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5147     simde__m128d r = simde_mm_move_sd(test_vec[i].a, test_vec[i].b);
5148     simde_assert_m128d_close(r, test_vec[i].r, 1);
5149   }
5150 
5151   return 0;
5152 }
5153 
5154 static int
5155 test_simde_mm_movemask_epi8(SIMDE_MUNIT_TEST_ARGS) {
5156   const struct {
5157     simde__m128i a;
5158     int32_t r;
5159   } test_vec[] = {
5160     { simde_mm_set_epi8(INT8_C(-125), INT8_C( -40), INT8_C(  -7), INT8_C( -71),
5161                         INT8_C( -75), INT8_C(  99), INT8_C( 101), INT8_C(  -5),
5162                         INT8_C( -71), INT8_C( -91), INT8_C( -60), INT8_C(   9),
5163                         INT8_C( -27), INT8_C( -81), INT8_C(   5), INT8_C(  97)),
5164       INT32_C(63980) },
5165     { simde_mm_set_epi8(INT8_C(  44), INT8_C( -98), INT8_C(  82), INT8_C(-127),
5166                         INT8_C( -28), INT8_C( 122), INT8_C( -22), INT8_C(  46),
5167                         INT8_C( -41), INT8_C( -35), INT8_C(  15), INT8_C(  43),
5168                         INT8_C( -37), INT8_C( -12), INT8_C(  17), INT8_C( -17)),
5169       INT32_C(23245) },
5170     { simde_mm_set_epi8(INT8_C( -53), INT8_C( -99), INT8_C(  91), INT8_C( -56),
5171                         INT8_C(  10), INT8_C( 114), INT8_C(-120), INT8_C(  67),
5172                         INT8_C( -82), INT8_C(  13), INT8_C( 104), INT8_C(   1),
5173                         INT8_C(  15), INT8_C(-115), INT8_C(  16), INT8_C(  33)),
5174       INT32_C(53892) },
5175     { simde_mm_set_epi8(INT8_C( 109), INT8_C(  -5), INT8_C( -45), INT8_C(  60),
5176                         INT8_C( -20), INT8_C(  -7), INT8_C( -24), INT8_C(  63),
5177                         INT8_C(  61), INT8_C( -94), INT8_C(-110), INT8_C(  16),
5178                         INT8_C( 117), INT8_C( -23), INT8_C( -49), INT8_C( -74)),
5179       INT32_C(28263) },
5180     { simde_mm_set_epi8(INT8_C( -88), INT8_C( 110), INT8_C(-108), INT8_C( -88),
5181                         INT8_C(  28), INT8_C( 110), INT8_C(   0), INT8_C( -12),
5182                         INT8_C( -90), INT8_C(  44), INT8_C( -42), INT8_C( -87),
5183                         INT8_C( -48), INT8_C( -87), INT8_C( -21), INT8_C( -64)),
5184       INT32_C(45503) },
5185     { simde_mm_set_epi8(INT8_C( 121), INT8_C(-111), INT8_C(  -1), INT8_C( -61),
5186                         INT8_C(  67), INT8_C(  90), INT8_C(  10), INT8_C(  65),
5187                         INT8_C(  36), INT8_C( -60), INT8_C(  93), INT8_C(  -3),
5188                         INT8_C(-112), INT8_C(  -8), INT8_C(  55), INT8_C( -49)),
5189       INT32_C(28765) },
5190     { simde_mm_set_epi8(INT8_C(  92), INT8_C( -27), INT8_C(  37), INT8_C( -87),
5191                         INT8_C(  58), INT8_C( 108), INT8_C( -50), INT8_C( -10),
5192                         INT8_C(   5), INT8_C(  21), INT8_C(  14), INT8_C(  72),
5193                         INT8_C( -76), INT8_C(  21), INT8_C(-104), INT8_C( 110)),
5194       INT32_C(21258) },
5195     { simde_mm_set_epi8(INT8_C( -60), INT8_C(  23), INT8_C( -54), INT8_C(  54),
5196                         INT8_C(  31), INT8_C(  13), INT8_C( -93), INT8_C(  18),
5197                         INT8_C( -62), INT8_C(-128), INT8_C(  70), INT8_C(  59),
5198                         INT8_C(  17), INT8_C(  49), INT8_C(  95), INT8_C( -96)),
5199       INT32_C(41665) }
5200   };
5201 
5202   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5203     int32_t r = simde_mm_movemask_epi8(test_vec[i].a);
5204     simde_assert_equal_i32(r, test_vec[i].r);
5205   }
5206 
5207   return 0;
5208 }
5209 
5210 static int
5211 test_simde_mm_movemask_pd(SIMDE_MUNIT_TEST_ARGS) {
5212   const struct {
5213     simde__m128d a;
5214     int32_t r;
5215   } test_vec[8] = {
5216     { simde_mm_set_pd(SIMDE_FLOAT64_C( -532.45), SIMDE_FLOAT64_C(  863.01)),
5217       INT32_C(2) },
5218     { simde_mm_set_pd(SIMDE_FLOAT64_C( -749.24), SIMDE_FLOAT64_C( -869.97)),
5219       INT32_C(3) },
5220     { simde_mm_set_pd(SIMDE_FLOAT64_C(  994.23), SIMDE_FLOAT64_C(  351.47)),
5221       INT32_C(0) },
5222     { simde_mm_set_pd(SIMDE_FLOAT64_C(  413.53), SIMDE_FLOAT64_C( -655.32)),
5223       INT32_C(1) },
5224     { simde_mm_set_pd(SIMDE_FLOAT64_C( -253.74), SIMDE_FLOAT64_C(   -2.37)),
5225       INT32_C(3) },
5226     { simde_mm_set_pd(SIMDE_FLOAT64_C(  696.22), SIMDE_FLOAT64_C( -699.75)),
5227       INT32_C(1) },
5228     { simde_mm_set_pd(SIMDE_FLOAT64_C(   55.24), SIMDE_FLOAT64_C( -722.45)),
5229       INT32_C(1) },
5230     { simde_mm_set_pd(SIMDE_FLOAT64_C( -592.60), SIMDE_FLOAT64_C(  141.10)),
5231       INT32_C(2) }
5232   };
5233 
5234   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5235     int32_t r = simde_mm_movemask_pd(test_vec[i].a);
5236     simde_assert_equal_i32(r, test_vec[i].r);
5237   }
5238 
5239   return 0;
5240 }
5241 
5242 static int
5243 test_simde_mm_movepi64_pi64(SIMDE_MUNIT_TEST_ARGS) {
5244   const struct {
5245     simde__m128i a;
5246     simde__m64 r;
5247   } test_vec[8] = {
5248     { simde_mm_set_epi64x(INT64_C(-3369091548753669372), INT64_C(-3862634862308997761)),
5249       simde_x_mm_set_pi64(INT64_C(-3862634862308997761)) },
5250     { simde_mm_set_epi64x(INT64_C( 1195923961730132400), INT64_C(-4203048506958717476)),
5251       simde_x_mm_set_pi64(INT64_C(-4203048506958717476)) },
5252     { simde_mm_set_epi64x(INT64_C( 4316262850566382732), INT64_C(  743544812785944809)),
5253       simde_x_mm_set_pi64(INT64_C(743544812785944809)) },
5254     { simde_mm_set_epi64x(INT64_C( 9153964415619232912), INT64_C( 7102186508934354546)),
5255       simde_x_mm_set_pi64(INT64_C(7102186508934354546)) },
5256     { simde_mm_set_epi64x(INT64_C( -149536427124813706), INT64_C(-2645616526676309339)),
5257       simde_x_mm_set_pi64(INT64_C(-2645616526676309339)) },
5258     { simde_mm_set_epi64x(INT64_C( 7660292028637459230), INT64_C(-4472173852492382560)),
5259       simde_x_mm_set_pi64(INT64_C(-4472173852492382560)) },
5260     { simde_mm_set_epi64x(INT64_C( 2373412759770157312), INT64_C( -249935199655019513)),
5261       simde_x_mm_set_pi64(INT64_C(-249935199655019513)) },
5262     { simde_mm_set_epi64x(INT64_C(-2495482311671930573), INT64_C( 7782795372632782061)),
5263       simde_x_mm_set_pi64(INT64_C(7782795372632782061)) }
5264   };
5265 
5266   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5267     simde__m64 r = simde_mm_movepi64_pi64(test_vec[i].a);
5268     simde_assert_m64_i64(r, ==, test_vec[i].r);
5269   }
5270 
5271   return 0;
5272 }
5273 
5274 static int
5275 test_simde_mm_movpi64_epi64(SIMDE_MUNIT_TEST_ARGS) {
5276   const struct {
5277     simde__m64 a;
5278     simde__m128i r;
5279   } test_vec[8] = {
5280    { simde_x_mm_set_pi64(INT64_C(8307669974137432024)),
5281       simde_mm_set_epi64x(INT64_C(                   0), INT64_C( 8307669974137432024)) },
5282     { simde_x_mm_set_pi64(INT64_C(-6174863101947913477)),
5283       simde_mm_set_epi64x(INT64_C(                   0), INT64_C(-6174863101947913477)) },
5284     { simde_x_mm_set_pi64(INT64_C(-3709498539865079997)),
5285       simde_mm_set_epi64x(INT64_C(                   0), INT64_C(-3709498539865079997)) },
5286     { simde_x_mm_set_pi64(INT64_C(-5655514474221449119)),
5287       simde_mm_set_epi64x(INT64_C(                   0), INT64_C(-5655514474221449119)) },
5288     { simde_x_mm_set_pi64(INT64_C(-4407711847161442183)),
5289       simde_mm_set_epi64x(INT64_C(                   0), INT64_C(-4407711847161442183)) },
5290     { simde_x_mm_set_pi64(INT64_C(-7730135383563833284)),
5291       simde_mm_set_epi64x(INT64_C(                   0), INT64_C(-7730135383563833284)) },
5292     { simde_x_mm_set_pi64(INT64_C(1417829150564629578)),
5293       simde_mm_set_epi64x(INT64_C(                   0), INT64_C( 1417829150564629578)) },
5294     { simde_x_mm_set_pi64(INT64_C(5667864625160412978)),
5295       simde_mm_set_epi64x(INT64_C(                   0), INT64_C( 5667864625160412978)) }
5296   };
5297 
5298   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5299     simde__m128i r = simde_mm_movpi64_epi64(test_vec[i].a);
5300     simde_assert_m128i_i64(r, ==, test_vec[i].r);
5301   }
5302 
5303   return 0;
5304 }
5305 
5306 static int
5307 test_simde_mm_mul_epu32(SIMDE_MUNIT_TEST_ARGS) {
5308   const struct {
5309     simde__m128i a;
5310     simde__m128i b;
5311     simde__m128i r;
5312   } test_vec[8] = {
5313     { simde_x_mm_set_epu32 (UINT32_C(1251634950), UINT32_C( 3010014325), UINT32_C( 3844926313), UINT32_C(  887004237)),
5314       simde_x_mm_set_epu32 (UINT32_C( 771412494), UINT32_C(  328239887), UINT32_C( 3360452571), UINT32_C(  298292680)),
5315       simde_x_mm_set_epu64x(UINT64_C(  988006761906381275), UINT64_C(  264586871026085160)) },
5316     { simde_x_mm_set_epu32 (UINT32_C(4076207020), UINT32_C(  368393187), UINT32_C( 3498508084), UINT32_C(  981141316)),
5317       simde_x_mm_set_epu32 (UINT32_C(  81654802), UINT32_C(  140153335), UINT32_C( 2110173535), UINT32_C( 3134478151)),
5318       simde_x_mm_set_epu64x(UINT64_C(   51631533749328645), UINT64_C( 3075366018045386716)) },
5319     { simde_x_mm_set_epu32 (UINT32_C(4261415154), UINT32_C(  846454649), UINT32_C(  108194122), UINT32_C( 4167432393)),
5320       simde_x_mm_set_epu32 (UINT32_C(2698880481), UINT32_C( 1287129030), UINT32_C( 2616406220), UINT32_C( 1248265871)),
5321       simde_x_mm_set_epu64x(UINT64_C( 1089496351306360470), UINT64_C( 5202063625881759303)) },
5322     { simde_x_mm_set_epu32 (UINT32_C( 911193301), UINT32_C( 1110766386), UINT32_C( 3009613617), UINT32_C( 1645784878)),
5323       simde_x_mm_set_epu32 (UINT32_C(3094480659), UINT32_C( 3697181600), UINT32_C( 4236850839), UINT32_C( 2133678416)),
5324       simde_x_mm_set_epu64x(UINT64_C( 4106705044217697600), UINT64_C( 3511575671567793248)) },
5325     { simde_x_mm_set_epu32 (UINT32_C(2154112155), UINT32_C( 2960710803), UINT32_C( 2851801912), UINT32_C(  678710951)),
5326       simde_x_mm_set_epu32 (UINT32_C(4001207654), UINT32_C( 4056994829), UINT32_C( 1341523746), UINT32_C(  568161818)),
5327       simde_x_mm_set_epu64x(UINT64_C(12011588417935437687), UINT64_C(  385617647816668918)) },
5328     { simde_x_mm_set_epu32 (UINT32_C(3273494172), UINT32_C( 3612698350), UINT32_C( 4103906203), UINT32_C( 1678207566)),
5329       simde_x_mm_set_epu32 (UINT32_C(  48120942), UINT32_C(  160747207), UINT32_C( 2820564214), UINT32_C( 1404181744)),
5330       simde_x_mm_set_epu64x(UINT64_C(  580731169496008450), UINT64_C( 2356508426819875104)) },
5331     { simde_x_mm_set_epu32 (UINT32_C( 396392525), UINT32_C( 2486526122), UINT32_C( 1177281917), UINT32_C( 3038155803)),
5332       simde_x_mm_set_epu32 (UINT32_C( 760783698), UINT32_C( 1253190575), UINT32_C( 4064848310), UINT32_C( 1630883223)),
5333       simde_x_mm_set_epu64x(UINT64_C( 3116091100581700150), UINT64_C( 4954877327972793069)) },
5334     { simde_x_mm_set_epu32 (UINT32_C(1438827395), UINT32_C( 1294325524), UINT32_C( 3245229436), UINT32_C(  122146781)),
5335       simde_x_mm_set_epu32 (UINT32_C(1030238038), UINT32_C( 4273209339), UINT32_C(  197838277), UINT32_C( 1151380764)),
5336       simde_x_mm_set_epu64x(UINT64_C( 5530923916862868636), UINT64_C(  140637454027920684)) }
5337   };
5338 
5339   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5340     simde__m128i r = simde_mm_mul_epu32(test_vec[i].a, test_vec[i].b);
5341     simde_assert_m128i_u64(r, ==, test_vec[i].r);
5342   }
5343 
5344   return 0;
5345 }
5346 
5347 static int
5348 test_simde_mm_mul_pd(SIMDE_MUNIT_TEST_ARGS) {
5349   const struct {
5350     simde__m128d a;
5351     simde__m128d b;
5352     simde__m128d r;
5353   } test_vec[8] = {
5354     { simde_mm_set_pd(SIMDE_FLOAT64_C(    602.19), SIMDE_FLOAT64_C(   -103.53)),
5355       simde_mm_set_pd(SIMDE_FLOAT64_C(   -952.79), SIMDE_FLOAT64_C(   -150.84)),
5356       simde_mm_set_pd(SIMDE_FLOAT64_C(-573760.61), SIMDE_FLOAT64_C(  15616.47)) },
5357     { simde_mm_set_pd(SIMDE_FLOAT64_C(   -140.84), SIMDE_FLOAT64_C(   -241.95)),
5358       simde_mm_set_pd(SIMDE_FLOAT64_C(    540.86), SIMDE_FLOAT64_C(   -754.39)),
5359       simde_mm_set_pd(SIMDE_FLOAT64_C( -76174.72), SIMDE_FLOAT64_C( 182524.66)) },
5360     { simde_mm_set_pd(SIMDE_FLOAT64_C(   -648.21), SIMDE_FLOAT64_C(   -612.11)),
5361       simde_mm_set_pd(SIMDE_FLOAT64_C(   -327.08), SIMDE_FLOAT64_C(   -865.34)),
5362       simde_mm_set_pd(SIMDE_FLOAT64_C( 212016.53), SIMDE_FLOAT64_C( 529683.27)) },
5363     { simde_mm_set_pd(SIMDE_FLOAT64_C(    518.61), SIMDE_FLOAT64_C(   -573.43)),
5364       simde_mm_set_pd(SIMDE_FLOAT64_C(   -650.79), SIMDE_FLOAT64_C(    196.03)),
5365       simde_mm_set_pd(SIMDE_FLOAT64_C(-337506.20), SIMDE_FLOAT64_C(-112409.48)) },
5366     { simde_mm_set_pd(SIMDE_FLOAT64_C(   -750.40), SIMDE_FLOAT64_C(    324.63)),
5367       simde_mm_set_pd(SIMDE_FLOAT64_C(    343.74), SIMDE_FLOAT64_C(     -4.14)),
5368       simde_mm_set_pd(SIMDE_FLOAT64_C(-257942.50), SIMDE_FLOAT64_C(  -1343.97)) },
5369     { simde_mm_set_pd(SIMDE_FLOAT64_C(    -48.73), SIMDE_FLOAT64_C(    769.19)),
5370       simde_mm_set_pd(SIMDE_FLOAT64_C(    268.16), SIMDE_FLOAT64_C(   -953.46)),
5371       simde_mm_set_pd(SIMDE_FLOAT64_C( -13067.44), SIMDE_FLOAT64_C(-733391.90)) },
5372     { simde_mm_set_pd(SIMDE_FLOAT64_C(    188.02), SIMDE_FLOAT64_C(    614.87)),
5373       simde_mm_set_pd(SIMDE_FLOAT64_C(    396.91), SIMDE_FLOAT64_C(   -399.68)),
5374       simde_mm_set_pd(SIMDE_FLOAT64_C(  74627.02), SIMDE_FLOAT64_C(-245751.24)) },
5375     { simde_mm_set_pd(SIMDE_FLOAT64_C(    813.52), SIMDE_FLOAT64_C(    480.96)),
5376       simde_mm_set_pd(SIMDE_FLOAT64_C(    664.31), SIMDE_FLOAT64_C(    447.07)),
5377       simde_mm_set_pd(SIMDE_FLOAT64_C( 540429.47), SIMDE_FLOAT64_C( 215022.79)) }
5378   };
5379 
5380   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5381     simde__m128d r = simde_mm_mul_pd(test_vec[i].a, test_vec[i].b);
5382     simde_assert_m128d_close(r, test_vec[i].r, 1);
5383   }
5384 
5385   return 0;
5386 }
5387 
5388 static int
5389 test_simde_mm_mul_sd(SIMDE_MUNIT_TEST_ARGS) {
5390   const struct {
5391     simde__m128d a;
5392     simde__m128d b;
5393     simde__m128d r;
5394   } test_vec[8] = {
5395     { simde_mm_set_pd(SIMDE_FLOAT64_C(    815.66), SIMDE_FLOAT64_C(    839.23)),
5396       simde_mm_set_pd(SIMDE_FLOAT64_C(    748.66), SIMDE_FLOAT64_C(    -52.12)),
5397       simde_mm_set_pd(SIMDE_FLOAT64_C(    815.66), SIMDE_FLOAT64_C( -43740.67)) },
5398     { simde_mm_set_pd(SIMDE_FLOAT64_C(   -714.36), SIMDE_FLOAT64_C(   -808.00)),
5399       simde_mm_set_pd(SIMDE_FLOAT64_C(    401.75), SIMDE_FLOAT64_C(    319.13)),
5400       simde_mm_set_pd(SIMDE_FLOAT64_C(   -714.36), SIMDE_FLOAT64_C(-257857.04)) },
5401     { simde_mm_set_pd(SIMDE_FLOAT64_C(    453.36), SIMDE_FLOAT64_C(   -764.44)),
5402       simde_mm_set_pd(SIMDE_FLOAT64_C(   -934.41), SIMDE_FLOAT64_C(   -454.88)),
5403       simde_mm_set_pd(SIMDE_FLOAT64_C(    453.36), SIMDE_FLOAT64_C( 347728.47)) },
5404     { simde_mm_set_pd(SIMDE_FLOAT64_C(    489.60), SIMDE_FLOAT64_C(    724.31)),
5405       simde_mm_set_pd(SIMDE_FLOAT64_C(   -101.25), SIMDE_FLOAT64_C(    196.93)),
5406       simde_mm_set_pd(SIMDE_FLOAT64_C(    489.60), SIMDE_FLOAT64_C( 142638.37)) },
5407     { simde_mm_set_pd(SIMDE_FLOAT64_C(    187.03), SIMDE_FLOAT64_C(    665.07)),
5408       simde_mm_set_pd(SIMDE_FLOAT64_C(    487.15), SIMDE_FLOAT64_C(    851.16)),
5409       simde_mm_set_pd(SIMDE_FLOAT64_C(    187.03), SIMDE_FLOAT64_C( 566080.98)) },
5410     { simde_mm_set_pd(SIMDE_FLOAT64_C(    589.48), SIMDE_FLOAT64_C(    648.27)),
5411       simde_mm_set_pd(SIMDE_FLOAT64_C(   -683.48), SIMDE_FLOAT64_C(    -59.67)),
5412       simde_mm_set_pd(SIMDE_FLOAT64_C(    589.48), SIMDE_FLOAT64_C( -38682.27)) },
5413     { simde_mm_set_pd(SIMDE_FLOAT64_C(    838.61), SIMDE_FLOAT64_C(    822.18)),
5414       simde_mm_set_pd(SIMDE_FLOAT64_C(   -364.43), SIMDE_FLOAT64_C(    962.26)),
5415       simde_mm_set_pd(SIMDE_FLOAT64_C(    838.61), SIMDE_FLOAT64_C( 791150.93)) },
5416     { simde_mm_set_pd(SIMDE_FLOAT64_C(    530.83), SIMDE_FLOAT64_C(    379.76)),
5417       simde_mm_set_pd(SIMDE_FLOAT64_C(     27.92), SIMDE_FLOAT64_C(    -56.09)),
5418       simde_mm_set_pd(SIMDE_FLOAT64_C(    530.83), SIMDE_FLOAT64_C( -21300.74)) }
5419   };
5420 
5421   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5422     simde__m128d r = simde_mm_mul_sd(test_vec[i].a, test_vec[i].b);
5423     simde_assert_m128d_close(r, test_vec[i].r, 1);
5424   }
5425 
5426   return 0;
5427 }
5428 
5429 static int
5430 test_simde_mm_mul_su32(SIMDE_MUNIT_TEST_ARGS) {
5431   const struct {
5432     simde__m64 a;
5433     simde__m64 b;
5434     simde__m64 r;
5435   } test_vec[8] = {
5436     { simde_x_mm_set_pu32(UINT32_C(3055040779), UINT32_C( 899100968)),
5437       simde_x_mm_set_pu32(UINT32_C(1940650668), UINT32_C(3777451497)),
5438       simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 3396310297525749096))) },
5439     { simde_x_mm_set_pu32(UINT32_C(2705843438), UINT32_C(2434885276)),
5440       simde_x_mm_set_pu32(UINT32_C(3024316392), UINT32_C(3861898348)),
5441       simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 9403279424953924048))) },
5442     { simde_x_mm_set_pu32(UINT32_C(3766308026), UINT32_C(1712773120)),
5443       simde_x_mm_set_pu32(UINT32_C( 817218479), UINT32_C(3651399110)),
5444       simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 6254018245999923200))) },
5445     { simde_x_mm_set_pu32(UINT32_C( 434012470), UINT32_C(1054365092)),
5446       simde_x_mm_set_pu32(UINT32_C(2682784668), UINT32_C(2536059630)),
5447       simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 2673932745102435960))) },
5448     { simde_x_mm_set_pu32(UINT32_C(3086788421), UINT32_C( 996821946)),
5449       simde_x_mm_set_pu32(UINT32_C(3201780597), UINT32_C(3958985305)),
5450       simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 3946403435915503530))) },
5451     { simde_x_mm_set_pu32(UINT32_C(3277786031), UINT32_C(4257890741)),
5452       simde_x_mm_set_pu32(UINT32_C(1195509971), UINT32_C(2579552899)),
5453       simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C(10983454404571808159))) },
5454     { simde_x_mm_set_pu32(UINT32_C(3106450314), UINT32_C(1125697671)),
5455       simde_x_mm_set_pu32(UINT32_C(2878635182), UINT32_C(3892244414)),
5456       simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 4381490471802559794))) },
5457     { simde_x_mm_set_pu32(UINT32_C(2670515723), UINT32_C(3917703761)),
5458       simde_x_mm_set_pu32(UINT32_C(3656211314), UINT32_C(2327792170)),
5459       simde_mm_cvtsi64_m64(HEDLEY_STATIC_CAST(int64_t, UINT64_C( 9119600139235351370))) }
5460   };
5461 
5462   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5463     simde__m64 r = simde_mm_mul_su32(test_vec[i].a, test_vec[i].b);
5464     simde_assert_m64_u64(r, ==, test_vec[i].r);
5465   }
5466 
5467   return 0;
5468 }
5469 
5470 static int
5471 test_simde_mm_mulhi_epi16(SIMDE_MUNIT_TEST_ARGS) {
5472   const struct {
5473     simde__m128i a;
5474     simde__m128i b;
5475     simde__m128i r;
5476   } test_vec[8] = {
5477     { simde_mm_set_epi16(INT16_C(-28198), INT16_C(-30713), INT16_C( 20992), INT16_C(-15285),
5478                          INT16_C( 16558), INT16_C(-12771), INT16_C(-10872), INT16_C(-32584)),
5479       simde_mm_set_epi16(INT16_C( -2396), INT16_C(-16729), INT16_C( 31162), INT16_C(-10205),
5480                          INT16_C( 24928), INT16_C(  5223), INT16_C(  7262), INT16_C( 25352)),
5481       simde_mm_set_epi16(INT16_C(  1030), INT16_C(  7839), INT16_C(  9981), INT16_C(  2380),
5482                          INT16_C(  6298), INT16_C( -1018), INT16_C( -1205), INT16_C(-12605)) },
5483     { simde_mm_set_epi16(INT16_C(-29475), INT16_C( -4667), INT16_C( 18782), INT16_C(-15431),
5484                          INT16_C(-27740), INT16_C( 28051), INT16_C(  4978), INT16_C(  1222)),
5485       simde_mm_set_epi16(INT16_C(-10541), INT16_C(-14468), INT16_C( 18685), INT16_C( 12375),
5486                          INT16_C( -5884), INT16_C(-11112), INT16_C( 23337), INT16_C( 12576)),
5487       simde_mm_set_epi16(INT16_C(  4740), INT16_C(  1030), INT16_C(  5354), INT16_C( -2914),
5488                          INT16_C(  2490), INT16_C( -4757), INT16_C(  1772), INT16_C(   234)) },
5489     { simde_mm_set_epi16(INT16_C( 27783), INT16_C(  6960), INT16_C( 17513), INT16_C( -7755),
5490                          INT16_C( 14695), INT16_C( 12404), INT16_C( -4129), INT16_C(-25366)),
5491       simde_mm_set_epi16(INT16_C( 29475), INT16_C( 25763), INT16_C( 29366), INT16_C( 12820),
5492                          INT16_C( -5355), INT16_C(  7751), INT16_C(-24426), INT16_C( -6617)),
5493       simde_mm_set_epi16(INT16_C( 12495), INT16_C(  2736), INT16_C(  7847), INT16_C( -1518),
5494                          INT16_C( -1201), INT16_C(  1467), INT16_C(  1538), INT16_C(  2561)) },
5495     { simde_mm_set_epi16(INT16_C(  8852), INT16_C( 11654), INT16_C( 12030), INT16_C( 21843),
5496                          INT16_C( 27012), INT16_C( 24122), INT16_C( -4121), INT16_C( 19864)),
5497       simde_mm_set_epi16(INT16_C(-24799), INT16_C(-30738), INT16_C( 19688), INT16_C(-21919),
5498                          INT16_C( 23874), INT16_C( -4632), INT16_C(-21648), INT16_C(-28317)),
5499       simde_mm_set_epi16(INT16_C( -3350), INT16_C( -5467), INT16_C(  3613), INT16_C( -7306),
5500                          INT16_C(  9840), INT16_C( -1705), INT16_C(  1361), INT16_C( -8583)) },
5501     { simde_mm_set_epi16(INT16_C(  2959), INT16_C(-18532), INT16_C(  4909), INT16_C( 17932),
5502                          INT16_C(  9150), INT16_C( 13660), INT16_C(-28547), INT16_C(  5006)),
5503       simde_mm_set_epi16(INT16_C( 16706), INT16_C(-30015), INT16_C(-32638), INT16_C( 13608),
5504                          INT16_C( -7846), INT16_C( 14914), INT16_C(-15409), INT16_C(-27711)),
5505       simde_mm_set_epi16(INT16_C(   754), INT16_C(  8487), INT16_C( -2445), INT16_C(  3723),
5506                          INT16_C( -1096), INT16_C(  3108), INT16_C(  6712), INT16_C( -2117)) },
5507     { simde_mm_set_epi16(INT16_C( 23854), INT16_C(-13644), INT16_C(-14015), INT16_C(-13375),
5508                          INT16_C(-26086), INT16_C( -6430), INT16_C( -5411), INT16_C(  7716)),
5509       simde_mm_set_epi16(INT16_C( -3281), INT16_C(-16733), INT16_C(-20310), INT16_C(   760),
5510                          INT16_C(-18586), INT16_C(  1673), INT16_C(-25298), INT16_C(-31758)),
5511       simde_mm_set_epi16(INT16_C( -1195), INT16_C(  3483), INT16_C(  4343), INT16_C(  -156),
5512                          INT16_C(  7397), INT16_C(  -165), INT16_C(  2088), INT16_C( -3740)) },
5513     { simde_mm_set_epi16(INT16_C(  5449), INT16_C(    38), INT16_C(  6018), INT16_C( 10627),
5514                          INT16_C( 20505), INT16_C( 28284), INT16_C(  4633), INT16_C(-26325)),
5515       simde_mm_set_epi16(INT16_C( 24784), INT16_C( 11314), INT16_C(  7455), INT16_C( 17813),
5516                          INT16_C( -6570), INT16_C(-17283), INT16_C( 30512), INT16_C(  2646)),
5517       simde_mm_set_epi16(INT16_C(  2060), INT16_C(     6), INT16_C(   684), INT16_C(  2888),
5518                          INT16_C( -2056), INT16_C( -7459), INT16_C(  2157), INT16_C( -1063)) },
5519     { simde_mm_set_epi16(INT16_C(-21624), INT16_C(  5121), INT16_C( 20041), INT16_C( 13722),
5520                          INT16_C(-24360), INT16_C(-19124), INT16_C(-16069), INT16_C( 19357)),
5521       simde_mm_set_epi16(INT16_C( -7842), INT16_C( 31372), INT16_C(-32681), INT16_C( 23520),
5522                          INT16_C( -3879), INT16_C( -7485), INT16_C( 22256), INT16_C( 12396)),
5523       simde_mm_set_epi16(INT16_C(  2587), INT16_C(  2451), INT16_C( -9994), INT16_C(  4924),
5524                          INT16_C(  1441), INT16_C(  2184), INT16_C( -5458), INT16_C(  3661)) }
5525   };
5526 
5527   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5528     simde__m128i r = simde_mm_mulhi_epi16(test_vec[i].a, test_vec[i].b);
5529     simde_assert_m128i_i16(r, ==, test_vec[i].r);
5530   }
5531 
5532   return 0;
5533 }
5534 
5535 static int
5536 test_simde_mm_mulhi_epu16(SIMDE_MUNIT_TEST_ARGS) {
5537   const struct {
5538     simde__m128i a;
5539     simde__m128i b;
5540     simde__m128i r;
5541   } test_vec[8] = {
5542     { simde_x_mm_set_epu16(35566, 15689, 63042, 57362, 59041, 31224, 19546, 12829),
5543       simde_x_mm_set_epu16(51447, 14621, 39095, 25022,  7138, 40387, 23161, 61024),
5544       simde_x_mm_set_epu16(27919,  3500, 37607, 21901,  6430, 19241,  6907, 11945) },
5545     { simde_x_mm_set_epu16(38922,  8893,  7997, 20067, 60307, 12929, 44791, 36818),
5546       simde_x_mm_set_epu16(56115, 46352, 39645, 27986, 64864, 64084,  5079, 17389),
5547       simde_x_mm_set_epu16(33326,  6289,  4837,  8569, 59688, 12642,  3471,  9769) },
5548     { simde_x_mm_set_epu16(15336, 63669, 63771, 21657, 12681, 61746,  3959, 20213),
5549       simde_x_mm_set_epu16(61649,  7462, 20857, 18418, 43120, 17135, 41045, 26167),
5550       simde_x_mm_set_epu16(14426,  7249, 20295,  6086,  8343, 16144,  2479,  8070) },
5551     { simde_x_mm_set_epu16(18737, 50787, 58977, 18610,  8077,  2942, 26014, 51355),
5552       simde_x_mm_set_epu16( 1776,  1953, 55756, 22299, 19400, 25284, 34496, 57058),
5553       simde_x_mm_set_epu16(  507,  1513, 50175,  6332,  2390,  1135, 13692, 44711) },
5554     { simde_x_mm_set_epu16(10154, 39850, 18306, 55081, 15606, 51707, 30878, 20967),
5555       simde_x_mm_set_epu16(43083, 50945, 49120, 63736, 15921, 64165, 33035, 50764),
5556       simde_x_mm_set_epu16( 6675, 30977, 13720, 53568,  3791, 50625, 15564, 16240) },
5557     { simde_x_mm_set_epu16(12757,  5042, 57712, 50374, 33497, 44643,  9249, 27444),
5558       simde_x_mm_set_epu16( 5516, 28001, 37996, 50447,  2209, 25118, 63921,  7578),
5559       simde_x_mm_set_epu16( 1073,  2154, 33459, 38775,  1129, 17110,  9021,  3173) },
5560     { simde_x_mm_set_epu16( 6520, 35794, 15094, 63136, 22779, 57672,  2423,  7676),
5561       simde_x_mm_set_epu16(20640, 11808, 58236, 53501, 38005, 59820,  7041, 59845),
5562       simde_x_mm_set_epu16( 2053,  6449, 13412, 51541, 13209, 52641,   260,  7009) },
5563     { simde_x_mm_set_epu16(60138,  6017, 21659, 30716, 29807, 17606, 41408, 64807),
5564       simde_x_mm_set_epu16(25712,  8473, 49119, 61515, 61789, 54600, 37356, 34280),
5565       simde_x_mm_set_epu16(23594,   777, 16233, 28831, 28102, 14668, 23602, 33898) }
5566   };
5567 
5568   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5569     simde__m128i r = simde_mm_mulhi_epu16(test_vec[i].a, test_vec[i].b);
5570     simde_assert_m128i_u16(r, ==, test_vec[i].r);
5571   }
5572 
5573   return 0;
5574 }
5575 
5576 static int
5577 test_simde_mm_mullo_epi16(SIMDE_MUNIT_TEST_ARGS) {
5578   const struct {
5579     simde__m128i a;
5580     simde__m128i b;
5581     simde__m128i r;
5582   } test_vec[8] = {
5583     { simde_mm_set_epi16(INT16_C( -7862), INT16_C( 26852), INT16_C( 10752), INT16_C( -9273),
5584                          INT16_C( -9160), INT16_C( -7080), INT16_C(-16165), INT16_C( -8327)),
5585       simde_mm_set_epi16(INT16_C(-20410), INT16_C( 24193), INT16_C(-22278), INT16_C(  -948),
5586                          INT16_C(-31925), INT16_C( -8469), INT16_C(  5801), INT16_C( 10383)),
5587       simde_mm_set_epi16(INT16_C( 31292), INT16_C(-27932), INT16_C(  1024), INT16_C(  8980),
5588                          INT16_C( 11368), INT16_C( -4920), INT16_C(  8851), INT16_C(-17257)) },
5589     { simde_mm_set_epi16(INT16_C( 10435), INT16_C( 19268), INT16_C( 27420), INT16_C(  9542),
5590                          INT16_C(-22355), INT16_C( 22255), INT16_C(-32016), INT16_C( 23304)),
5591       simde_mm_set_epi16(INT16_C( -3883), INT16_C( 14714), INT16_C(-16367), INT16_C(  4175),
5592                          INT16_C( 13386), INT16_C( 20048), INT16_C(-30329), INT16_C(-26826)),
5593       simde_mm_set_epi16(INT16_C(-17857), INT16_C(   616), INT16_C(  7388), INT16_C( -8038),
5594                          INT16_C( -6654), INT16_C(  -848), INT16_C( 31888), INT16_C( -5200)) },
5595     { simde_mm_set_epi16(INT16_C( 16747), INT16_C(-31494), INT16_C( -6008), INT16_C(   256),
5596                          INT16_C( 13584), INT16_C( -2628), INT16_C( 32210), INT16_C(-21204)),
5597       simde_mm_set_epi16(INT16_C(  5844), INT16_C(-28058), INT16_C( -1961), INT16_C( -4057),
5598                          INT16_C(-28767), INT16_C(-15421), INT16_C(-28399), INT16_C(  6019)),
5599       simde_mm_set_epi16(INT16_C( 24220), INT16_C(-28772), INT16_C(-14792), INT16_C(  9984),
5600                          INT16_C( 20240), INT16_C( 25140), INT16_C( 19698), INT16_C(-28284)) },
5601     { simde_mm_set_epi16(INT16_C( -6420), INT16_C( -8597), INT16_C( -3796), INT16_C( 23244),
5602                          INT16_C(-31410), INT16_C(  -804), INT16_C( 31623), INT16_C(  -736)),
5603       simde_mm_set_epi16(INT16_C( -5973), INT16_C( -2870), INT16_C( -5873), INT16_C( -1641),
5604                          INT16_C( -1760), INT16_C( 10653), INT16_C(-28567), INT16_C( 14335)),
5605       simde_mm_set_epi16(INT16_C(  8100), INT16_C( 31854), INT16_C( 11668), INT16_C( -1452),
5606                          INT16_C(-30784), INT16_C( 20204), INT16_C(-26017), INT16_C(   736)) },
5607     { simde_mm_set_epi16(INT16_C(-30942), INT16_C( 23208), INT16_C(  -332), INT16_C(-26357),
5608                          INT16_C( -4575), INT16_C( 25713), INT16_C(-11436), INT16_C(-20469)),
5609       simde_mm_set_epi16(INT16_C( 10752), INT16_C( -6855), INT16_C(-32031), INT16_C( 11523),
5610                          INT16_C(   341), INT16_C( 13013), INT16_C( 12462), INT16_C(-19043)),
5611       simde_mm_set_epi16(INT16_C(-27648), INT16_C( 30568), INT16_C( 17460), INT16_C(-17887),
5612                          INT16_C( 12789), INT16_C(-23547), INT16_C( 25368), INT16_C(-16961)) },
5613     { simde_mm_set_epi16(INT16_C( -9419), INT16_C(-28719), INT16_C( 16604), INT16_C( 20761),
5614                          INT16_C(  7656), INT16_C( 31821), INT16_C( 14202), INT16_C(-12774)),
5615       simde_mm_set_epi16(INT16_C(-24440), INT16_C( -4751), INT16_C(-13213), INT16_C( 10351),
5616                          INT16_C( 25105), INT16_C( -3784), INT16_C(  2889), INT16_C( 15532)),
5617       simde_mm_set_epi16(INT16_C(-27608), INT16_C( -1983), INT16_C( 25876), INT16_C(  4567),
5618                          INT16_C(-13208), INT16_C(-21032), INT16_C(  4042), INT16_C(-28296)) },
5619     { simde_mm_set_epi16(INT16_C( 24274), INT16_C( 31467), INT16_C( 17654), INT16_C(-30184),
5620                          INT16_C( -7163), INT16_C( 32482), INT16_C( 19535), INT16_C(-21227)),
5621       simde_mm_set_epi16(INT16_C( 18405), INT16_C(-30234), INT16_C(  7564), INT16_C(-18060),
5622                          INT16_C( 16638), INT16_C(-17950), INT16_C(  -411), INT16_C(-23904)),
5623       simde_mm_set_epi16(INT16_C(  4058), INT16_C( 12834), INT16_C(-27512), INT16_C( -5408),
5624                          INT16_C( 31990), INT16_C( 21892), INT16_C( 32043), INT16_C( 30496)) },
5625     { simde_mm_set_epi16(INT16_C(-10768), INT16_C(-21062), INT16_C( 22181), INT16_C( 31606),
5626                          INT16_C( 16135), INT16_C(-14823), INT16_C(-19116), INT16_C(-13035)),
5627       simde_mm_set_epi16(INT16_C( 25288), INT16_C(-13107), INT16_C(-24173), INT16_C(-10010),
5628                          INT16_C(-10251), INT16_C(  9523), INT16_C( 29977), INT16_C(-13646)),
5629       simde_mm_set_epi16(INT16_C(   896), INT16_C( 22002), INT16_C(-31297), INT16_C( 31748),
5630                          INT16_C( 12979), INT16_C(  5115), INT16_C(  6452), INT16_C( 10906)) }
5631   };
5632 
5633   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5634     simde__m128i r = simde_mm_mullo_epi16(test_vec[i].a, test_vec[i].b);
5635     simde_assert_m128i_i16(r, ==, test_vec[i].r);
5636   }
5637 
5638   return 0;
5639 }
5640 
5641 static int
5642 test_simde_mm_or_pd(SIMDE_MUNIT_TEST_ARGS) {
5643   const struct {
5644     simde__m128d a;
5645     simde__m128d b;
5646     simde__m128d r;
5647   } test_vec[8] = {
5648     { simde_mm_set_pd(SIMDE_FLOAT64_C(    724.92), SIMDE_FLOAT64_C(    616.22)),
5649       simde_mm_set_pd(SIMDE_FLOAT64_C(    797.85), SIMDE_FLOAT64_C(    484.18)),
5650       simde_mm_set_pd(SIMDE_FLOAT64_C(    989.98), SIMDE_FLOAT64_C( 128062.24)) },
5651     { simde_mm_set_pd(SIMDE_FLOAT64_C(    482.51), SIMDE_FLOAT64_C(    841.87)),
5652       simde_mm_set_pd(SIMDE_FLOAT64_C(   -558.83), SIMDE_FLOAT64_C(    997.07)),
5653       simde_mm_set_pd(SIMDE_FLOAT64_C(-129002.75), SIMDE_FLOAT64_C(   1005.87)) },
5654     { simde_mm_set_pd(SIMDE_FLOAT64_C(    741.60), SIMDE_FLOAT64_C(   -412.08)),
5655       simde_mm_set_pd(SIMDE_FLOAT64_C(   -337.67), SIMDE_FLOAT64_C(   -516.98)),
5656       simde_mm_set_pd(SIMDE_FLOAT64_C( -95215.80), SIMDE_FLOAT64_C(-106109.48)) },
5657     { simde_mm_set_pd(SIMDE_FLOAT64_C(    724.85), SIMDE_FLOAT64_C(   -403.04)),
5658       simde_mm_set_pd(SIMDE_FLOAT64_C(   -503.03), SIMDE_FLOAT64_C(   -699.51)),
5659       simde_mm_set_pd(SIMDE_FLOAT64_C(-130927.93), SIMDE_FLOAT64_C(-122827.50)) },
5660     { simde_mm_set_pd(SIMDE_FLOAT64_C(    231.42), SIMDE_FLOAT64_C(    688.03)),
5661       simde_mm_set_pd(SIMDE_FLOAT64_C(   -373.50), SIMDE_FLOAT64_C(    983.44)),
5662       simde_mm_set_pd(SIMDE_FLOAT64_C(   -511.84), SIMDE_FLOAT64_C(   1015.47)) },
5663     { simde_mm_set_pd(SIMDE_FLOAT64_C(    625.94), SIMDE_FLOAT64_C(   -703.47)),
5664       simde_mm_set_pd(SIMDE_FLOAT64_C(   -942.06), SIMDE_FLOAT64_C(    249.38)),
5665       simde_mm_set_pd(SIMDE_FLOAT64_C(  -1024.00), SIMDE_FLOAT64_C( -65535.34)) },
5666     { simde_mm_set_pd(SIMDE_FLOAT64_C(    -97.92), SIMDE_FLOAT64_C(    -70.84)),
5667       simde_mm_set_pd(SIMDE_FLOAT64_C(   -510.77), SIMDE_FLOAT64_C(   -381.02)),
5668       simde_mm_set_pd(SIMDE_FLOAT64_C(   -511.93), SIMDE_FLOAT64_C(   -383.36)) },
5669     { simde_mm_set_pd(SIMDE_FLOAT64_C(   -350.87), SIMDE_FLOAT64_C(   -439.10)),
5670       simde_mm_set_pd(SIMDE_FLOAT64_C(     66.40), SIMDE_FLOAT64_C(    195.88)),
5671       simde_mm_set_pd(SIMDE_FLOAT64_C(   -351.87), SIMDE_FLOAT64_C(   -439.86)) }
5672   };
5673 
5674   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5675     simde__m128d r = simde_mm_or_pd(test_vec[i].a, test_vec[i].b);
5676     simde_assert_m128d_close(r, test_vec[i].r, 1);
5677   }
5678 
5679   return 0;
5680 }
5681 
5682 static int
5683 test_simde_mm_or_si128(SIMDE_MUNIT_TEST_ARGS) {
5684   const struct {
5685     simde__m128i a;
5686     simde__m128i b;
5687     simde__m128i r;
5688   } test_vec[8] = {
5689     { simde_mm_set_epi64x(INT64_C(   3806780817851842454), INT64_C(   3002076500639794819)),
5690       simde_mm_set_epi64x(INT64_C(  -1576369425501019200), INT64_C(   5863973371898850910)),
5691       simde_mm_set_epi64x(INT64_C(    -81065909581643818), INT64_C(   8784698508288454367)) },
5692     { simde_mm_set_epi64x(INT64_C(   4358272343769327172), INT64_C(  -4254544166297055533)),
5693       simde_mm_set_epi64x(INT64_C(  -3870591542062132163), INT64_C(   8365983368440196218)),
5694       simde_mm_set_epi64x(INT64_C(   -109223286268234115), INT64_C(   -793210092996038917)) },
5695     { simde_mm_set_epi64x(INT64_C(  -8935978336450140157), INT64_C(  -4292132981830530492)),
5696       simde_mm_set_epi64x(INT64_C(  -5275996428160709349), INT64_C(   1809702168782653061)),
5697       simde_mm_set_epi64x(INT64_C(  -5188173984729010917), INT64_C(  -2486163139644895547)) },
5698     { simde_mm_set_epi64x(INT64_C(  -3617483608260678394), INT64_C(  -7299761588855953181)),
5699       simde_mm_set_epi64x(INT64_C(  -3679366837934484296), INT64_C(   5342128716508209170)),
5700       simde_mm_set_epi64x(INT64_C(  -3603900203459740226), INT64_C(  -2687789418219853581)) },
5701     { simde_mm_set_epi64x(INT64_C(   8613776548693408177), INT64_C(  -1221094295236221778)),
5702       simde_mm_set_epi64x(INT64_C(   8491744443283364215), INT64_C(   4783609441494973751)),
5703       simde_mm_set_epi64x(INT64_C(   8636493096189557239), INT64_C(  -1193507273608823361)) },
5704     { simde_mm_set_epi64x(INT64_C(   2256952633337952767), INT64_C(  -5574602856706714295)),
5705       simde_mm_set_epi64x(INT64_C(  -7711313128986328449), INT64_C(  -5631421726257218112)),
5706       simde_mm_set_epi64x(INT64_C(  -6918672559143650305), INT64_C(  -5477736148453327415)) },
5707     { simde_mm_set_epi64x(INT64_C(   6915809581026069253), INT64_C(  -4447049561909832301)),
5708       simde_mm_set_epi64x(INT64_C(   7606865206928880870), INT64_C(    526097040835303983)),
5709       simde_mm_set_epi64x(INT64_C(   9221656517182193639), INT64_C(  -4085336622434885697)) },
5710     { simde_mm_set_epi64x(INT64_C(   6255716227368614659), INT64_C(   3842255123517004943)),
5711       simde_mm_set_epi64x(INT64_C(  -2544504471973996098), INT64_C(   7324902981920444710)),
5712       simde_mm_set_epi64x(INT64_C(  -2382159098826458177), INT64_C(   8500384867471056303)) }
5713   };
5714 
5715   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5716     simde__m128i r = simde_mm_or_si128(test_vec[i].a, test_vec[i].b);
5717     simde_assert_m128i_i8(r, ==, test_vec[i].r);
5718   }
5719 
5720   return 0;
5721 }
5722 
5723 static int
5724 test_simde_mm_packs_epi16(SIMDE_MUNIT_TEST_ARGS) {
5725   const struct {
5726     simde__m128i a;
5727     simde__m128i b;
5728     simde__m128i r;
5729   } test_vec[8] = {
5730     { simde_mm_set_epi16(INT16_C(-22268), INT16_C(   -16), INT16_C(   -49), INT16_C(     8),
5731                          INT16_C( 20029), INT16_C(-30901), INT16_C(-17364), INT16_C(   -65)),
5732       simde_mm_set_epi16(INT16_C(-20429), INT16_C(  4131), INT16_C(-19140), INT16_C( 23907),
5733                          INT16_C(   -87), INT16_C(-15818), INT16_C(   -93), INT16_C(   -34)),
5734       simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( -87), INT8_C(-128), INT8_C( -93), INT8_C( -34),
5735                         INT8_C(-128), INT8_C( -16), INT8_C( -49), INT8_C(   8), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( -65)) },
5736     { simde_mm_set_epi16(INT16_C( -1320), INT16_C(    64), INT16_C(  7903), INT16_C(   -86),
5737                          INT16_C( 17775), INT16_C(   -29), INT16_C(-24347), INT16_C( 20534)),
5738       simde_mm_set_epi16(INT16_C(   -26), INT16_C( 32460), INT16_C(   -35), INT16_C(     9),
5739                          INT16_C(    97), INT16_C(-16116), INT16_C( 21908), INT16_C( 31051)),
5740       simde_mm_set_epi8(INT8_C( -26), INT8_C( 127), INT8_C( -35), INT8_C(   9), INT8_C(  97), INT8_C(-128), INT8_C( 127), INT8_C( 127),
5741                         INT8_C(-128), INT8_C(  64), INT8_C( 127), INT8_C( -86), INT8_C( 127), INT8_C( -29), INT8_C(-128), INT8_C( 127)) },
5742     { simde_mm_set_epi16(INT16_C(    16), INT16_C(-23521), INT16_C(   107), INT16_C( 10693),
5743                          INT16_C(    37), INT16_C( 32277), INT16_C(  -120), INT16_C(   -13)),
5744       simde_mm_set_epi16(INT16_C(  7912), INT16_C(   127), INT16_C(-27046), INT16_C(  -104),
5745                          INT16_C(   114), INT16_C(   -54), INT16_C(   -26), INT16_C( 29057)),
5746       simde_mm_set_epi8(INT8_C( 127), INT8_C( 127), INT8_C(-128), INT8_C(-104), INT8_C( 114), INT8_C( -54), INT8_C( -26), INT8_C( 127),
5747                         INT8_C(  16), INT8_C(-128), INT8_C( 107), INT8_C( 127), INT8_C(  37), INT8_C( 127), INT8_C(-120), INT8_C( -13)) },
5748     { simde_mm_set_epi16(INT16_C(     8), INT16_C(   -84), INT16_C(    26), INT16_C( -1727),
5749                          INT16_C(    53), INT16_C( 29056), INT16_C( -7932), INT16_C(    40)),
5750       simde_mm_set_epi16(INT16_C(-25560), INT16_C(    94), INT16_C( 19164), INT16_C(  -119),
5751                          INT16_C(-25450), INT16_C( 26043), INT16_C( -9549), INT16_C(   110)),
5752       simde_mm_set_epi8(INT8_C(-128), INT8_C(  94), INT8_C( 127), INT8_C(-119), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C( 110),
5753                         INT8_C(   8), INT8_C( -84), INT8_C(  26), INT8_C(-128), INT8_C(  53), INT8_C( 127), INT8_C(-128), INT8_C(  40)) },
5754     { simde_mm_set_epi16(INT16_C( 17087), INT16_C(     3), INT16_C( 26871), INT16_C(   126),
5755                          INT16_C(-10072), INT16_C(    95), INT16_C(   117), INT16_C(   110)),
5756       simde_mm_set_epi16(INT16_C(  7667), INT16_C( -3918), INT16_C(   -98), INT16_C(   -77),
5757                          INT16_C( 29383), INT16_C(-21060), INT16_C(-18775), INT16_C( 21121)),
5758       simde_mm_set_epi8(INT8_C( 127), INT8_C(-128), INT8_C( -98), INT8_C( -77), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127),
5759                         INT8_C( 127), INT8_C(   3), INT8_C( 127), INT8_C( 126), INT8_C(-128), INT8_C(  95), INT8_C( 117), INT8_C( 110)) },
5760     { simde_mm_set_epi16(INT16_C(  -120), INT16_C(-29564), INT16_C(  -120), INT16_C(   -79),
5761                          INT16_C(   -93), INT16_C(-23649), INT16_C( 25423), INT16_C(-23661)),
5762       simde_mm_set_epi16(INT16_C(   109), INT16_C(-30808), INT16_C(    45), INT16_C(   -18),
5763                          INT16_C( -4268), INT16_C( 30580), INT16_C(    77), INT16_C( -1896)),
5764       simde_mm_set_epi8(INT8_C( 109), INT8_C(-128), INT8_C(  45), INT8_C( -18), INT8_C(-128), INT8_C( 127), INT8_C(  77), INT8_C(-128),
5765                         INT8_C(-120), INT8_C(-128), INT8_C(-120), INT8_C( -79), INT8_C( -93), INT8_C(-128), INT8_C( 127), INT8_C(-128)) },
5766     { simde_mm_set_epi16(INT16_C(    75), INT16_C(   -80), INT16_C(    -5), INT16_C(   -23),
5767                          INT16_C( -9879), INT16_C(   116), INT16_C(-20199), INT16_C(  5095)),
5768       simde_mm_set_epi16(INT16_C( -3339), INT16_C(   -48), INT16_C(  -117), INT16_C( -2107),
5769                          INT16_C( 11715), INT16_C(-13793), INT16_C(-31434), INT16_C( 25021)),
5770       simde_mm_set_epi8(INT8_C(-128), INT8_C( -48), INT8_C(-117), INT8_C(-128), INT8_C( 127), INT8_C(-128), INT8_C(-128), INT8_C( 127),
5771                         INT8_C(  75), INT8_C( -80), INT8_C(  -5), INT8_C( -23), INT8_C(-128), INT8_C( 116), INT8_C(-128), INT8_C( 127)) },
5772     { simde_mm_set_epi16(INT16_C(   -15), INT16_C(   110), INT16_C(  -521), INT16_C(    75),
5773                          INT16_C( 12019), INT16_C(-30116), INT16_C( 17702), INT16_C( 14401)),
5774       simde_mm_set_epi16(INT16_C(-15008), INT16_C(   -80), INT16_C(  -127), INT16_C(-29333),
5775                          INT16_C(    -7), INT16_C(-17846), INT16_C(    83), INT16_C( 25637)),
5776       simde_mm_set_epi8(INT8_C(-128), INT8_C( -80), INT8_C(-127), INT8_C(-128), INT8_C(  -7), INT8_C(-128), INT8_C(  83), INT8_C( 127),
5777                         INT8_C( -15), INT8_C( 110), INT8_C(-128), INT8_C(  75), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C( 127)) }
5778   };
5779 
5780   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5781     simde__m128i r = simde_mm_packs_epi16(test_vec[i].a, test_vec[i].b);
5782     simde_assert_m128i_i8(r, ==, test_vec[i].r);
5783   }
5784 
5785   return 0;
5786 }
5787 
5788 static int
5789 test_simde_mm_packs_epi32(SIMDE_MUNIT_TEST_ARGS) {
5790   const struct {
5791     simde__m128i a;
5792     simde__m128i b;
5793     simde__m128i r;
5794   } test_vec[8] = {
5795     { simde_mm_set_epi32(INT32_C( 1221393622), INT32_C(    1245122), INT32_C( -546439182), INT32_C( 1653967185)),
5796       simde_mm_set_epi32(INT32_C(   -5570627), INT32_C( 1604714526), INT32_C(    3276815), INT32_C( -865960168)),
5797       simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768),
5798                          INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) },
5799     { simde_mm_set_epi32(INT32_C(-1556742099), INT32_C(    3735602), INT32_C( -795913538), INT32_C(    5177351)),
5800       simde_mm_set_epi32(INT32_C(  230555532), INT32_C( -681902099), INT32_C(-1460947394), INT32_C( 1435959285)),
5801       simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767),
5802                          INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) },
5803     { simde_mm_set_epi32(INT32_C( 1058013130), INT32_C( 1801350196), INT32_C(    3735625), INT32_C(     393200)),
5804       simde_mm_set_epi32(INT32_C(   -5046245), INT32_C( 1947557327), INT32_C( -390520293), INT32_C(-1060577736)),
5805       simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768),
5806                          INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767)) },
5807     { simde_mm_set_epi32(INT32_C( 1625994666), INT32_C(  151157112), INT32_C(   -6356918), INT32_C(  574958135)),
5808       simde_mm_set_epi32(INT32_C( -878149423), INT32_C(   -1310820), INT32_C(    7694016), INT32_C(    1656093)),
5809       simde_mm_set_epi16(INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C( 32767),
5810                          INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C( 32767)) },
5811     { simde_mm_set_epi32(INT32_C(  906756004), INT32_C(  589883340), INT32_C(-1375993871), INT32_C(   -5221415)),
5812       simde_mm_set_epi32(INT32_C(-1492628097), INT32_C(   -7536518), INT32_C(    1834989), INT32_C(-2090880115)),
5813       simde_mm_set_epi16(INT16_C(-32768), INT16_C(-32768), INT16_C( 32767), INT16_C(-32768),
5814                          INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) },
5815     { simde_mm_set_epi32(INT32_C( 1759910713), INT32_C( 2028743221), INT32_C( 1203039561), INT32_C(    3735524)),
5816       simde_mm_set_epi32(INT32_C(    5505016), INT32_C(   68681650), INT32_C(    3895727), INT32_C(-1084227687)),
5817       simde_mm_set_epi16(INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C(-32768),
5818                          INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767)) },
5819     { simde_mm_set_epi32(INT32_C( 1249181759), INT32_C(  850460644), INT32_C(  643956807), INT32_C( 1402185830)),
5820       simde_mm_set_epi32(INT32_C(  503821785), INT32_C(   -1966044), INT32_C(   -1228291), INT32_C(    6420027)),
5821       simde_mm_set_epi16(INT16_C( 32767), INT16_C(-32768), INT16_C(-32768), INT16_C( 32767),
5822                          INT16_C( 32767), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767)) },
5823     { simde_mm_set_epi32(INT32_C( 1562990695), INT32_C(  134021098), INT32_C(-1071906850), INT32_C( -558152330)),
5824       simde_mm_set_epi32(INT32_C(-1746927677), INT32_C(    7209004), INT32_C(     917512), INT32_C(    4155429)),
5825       simde_mm_set_epi16(INT16_C(-32768), INT16_C( 32767), INT16_C( 32767), INT16_C( 32767),
5826                          INT16_C( 32767), INT16_C( 32767), INT16_C(-32768), INT16_C(-32768)) }
5827   };
5828 
5829   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5830     simde__m128i r = simde_mm_packs_epi32(test_vec[i].a, test_vec[i].b);
5831     simde_assert_m128i_i16(r, ==, test_vec[i].r);
5832   }
5833 
5834   return 0;
5835 }
5836 
5837 static int
5838 test_simde_mm_packus_epi16(SIMDE_MUNIT_TEST_ARGS) {
5839   const struct {
5840     simde__m128i a;
5841     simde__m128i b;
5842     simde__m128i r;
5843   } test_vec[8] = {
5844     { simde_mm_set_epi16(INT16_C(   217), INT16_C(-10893), INT16_C( 10007), INT16_C(-11974),
5845                          INT16_C(   134), INT16_C(    45), INT16_C(    21), INT16_C(   179)),
5846       simde_mm_set_epi16(INT16_C( 14829), INT16_C(    37), INT16_C(  2757), INT16_C(-26385),
5847                          INT16_C(    26), INT16_C(   196), INT16_C(  2768), INT16_C(   221)),
5848       simde_x_mm_set_epu8(255,  37, 255,   0,  26, 196, 255, 221,
5849                           217,   0, 255,   0, 134,  45,  21, 179) },
5850     { simde_mm_set_epi16(INT16_C(    84), INT16_C( 11197), INT16_C(    28), INT16_C(-18960),
5851                          INT16_C(     0), INT16_C(   243), INT16_C(   209), INT16_C(   115)),
5852       simde_mm_set_epi16(INT16_C( 26800), INT16_C(    44), INT16_C(   244), INT16_C(   114),
5853                          INT16_C(   234), INT16_C(  7269), INT16_C(  2441), INT16_C( -9419)),
5854       simde_x_mm_set_epu8(255,  44, 244, 114, 234, 255, 255,   0,
5855                            84, 255,  28,   0,   0, 243, 209, 115) },
5856     { simde_mm_set_epi16(INT16_C( 26559), INT16_C(-13811), INT16_C(   141), INT16_C(   130),
5857                          INT16_C(-24149), INT16_C(   185), INT16_C(  9120), INT16_C(-14604)),
5858       simde_mm_set_epi16(INT16_C(   190), INT16_C(   162), INT16_C(  3761), INT16_C(-10696),
5859                          INT16_C( 15175), INT16_C(  6926), INT16_C( 19649), INT16_C(    79)),
5860       simde_x_mm_set_epu8(190, 162, 255,   0, 255, 255, 255,  79,
5861                           255,   0, 141, 130,   0, 185, 255,   0) },
5862     { simde_mm_set_epi16(INT16_C(   250), INT16_C(-23643), INT16_C(-15994), INT16_C(   173),
5863                          INT16_C(    97), INT16_C(   158), INT16_C(    82), INT16_C(   231)),
5864       simde_mm_set_epi16(INT16_C(    70), INT16_C( 30022), INT16_C(     0), INT16_C( -8717),
5865                          INT16_C(     6), INT16_C(   206), INT16_C(-25401), INT16_C(   252)),
5866       simde_x_mm_set_epu8( 70, 255,   0,   0,   6, 206,   0, 252,
5867                           250,   0,   0, 173,  97, 158,  82, 231) },
5868     { simde_mm_set_epi16(INT16_C(    92), INT16_C(-13839), INT16_C(   243), INT16_C( -3624),
5869                          INT16_C(   252), INT16_C(-29405), INT16_C(     3), INT16_C(  6730)),
5870       simde_mm_set_epi16(INT16_C(  4496), INT16_C( 19200), INT16_C(    70), INT16_C(   128),
5871                          INT16_C(  2496), INT16_C(    60), INT16_C( 18531), INT16_C(-20006)),
5872       simde_x_mm_set_epu8(255, 255,  70, 128, 255,  60, 255,   0,
5873                            92,   0, 243,   0, 252,   0,   3, 255) },
5874     { simde_mm_set_epi16(INT16_C(    57), INT16_C(-14586), INT16_C( 21134), INT16_C(  7065),
5875                          INT16_C(     3), INT16_C(-16049), INT16_C( 26223), INT16_C(-20721)),
5876       simde_mm_set_epi16(INT16_C(   129), INT16_C(   105), INT16_C( -1899), INT16_C(   221),
5877                          INT16_C(-24446), INT16_C(-20297), INT16_C( 30906), INT16_C(   192)),
5878       simde_x_mm_set_epu8(129, 105,   0, 221,   0,   0, 255, 192,
5879                            57,   0, 255, 255,   3,   0, 255,   0) },
5880     { simde_mm_set_epi16(INT16_C(   128), INT16_C( 22639), INT16_C( -9670), INT16_C(  8168),
5881                          INT16_C( -1055), INT16_C(-24505), INT16_C( 32719), INT16_C( 16999)),
5882       simde_mm_set_epi16(INT16_C(-23185), INT16_C(   119), INT16_C(   108), INT16_C(    34),
5883                          INT16_C(-15892), INT16_C(  2641), INT16_C(   242), INT16_C( -7325)),
5884       simde_x_mm_set_epu8(  0, 119, 108,  34,   0, 255, 242,   0,
5885                           128, 255,   0, 255,   0,   0, 255, 255) },
5886     { simde_mm_set_epi16(INT16_C(    95), INT16_C(   145), INT16_C(   101), INT16_C(  5449),
5887                          INT16_C(   163), INT16_C( 19185), INT16_C(  3025), INT16_C(    52)),
5888       simde_mm_set_epi16(INT16_C(  2870), INT16_C(   140), INT16_C(   144), INT16_C(   254),
5889                          INT16_C(  8482), INT16_C(  4388), INT16_C(   201), INT16_C(-14867)),
5890       simde_x_mm_set_epu8(255, 140, 144, 254, 255, 255, 201,   0,
5891                            95, 145, 101, 255, 163, 255, 255,  52) }
5892   };
5893 
5894   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5895     simde__m128i r = simde_mm_packus_epi16(test_vec[i].a, test_vec[i].b);
5896     simde_assert_m128i_i8(r, ==, test_vec[i].r);
5897   }
5898 
5899   return 0;
5900 }
5901 
5902 static int
5903 test_simde_mm_sad_epu8(SIMDE_MUNIT_TEST_ARGS) {
5904   const struct {
5905     simde__m128i a;
5906     simde__m128i b;
5907     simde__m128i r;
5908   } test_vec[8] = {
5909     { simde_x_mm_set_epu8(215,  90,  59, 114, 199, 190,   5,  88,
5910                           189, 152,  95,  90,  71,  40,  85,  39),
5911       simde_x_mm_set_epu8( 54, 166, 154, 195, 131,  97, 225, 141,
5912                           107,   5,  50,  55, 194,  31, 223,  92),
5913       simde_mm_set_epi64x(INT64_C( 847), INT64_C( 632)) },
5914     { simde_x_mm_set_epu8( 73,   8,  35,  63,   9, 118, 137, 154,
5915                           163,  61,   8,   4,  96,  39, 181,  31),
5916       simde_x_mm_set_epu8(244,  64,  21,   0,  73,  79,  47, 148,
5917                           227,   0, 217, 151, 241, 123, 179, 200),
5918       simde_mm_set_epi64x(INT64_C( 503), INT64_C( 881)) },
5919     { simde_x_mm_set_epu8(188, 156, 164, 209,  37, 165, 186, 237,
5920                           157,  45, 141,   9, 227,   9,   6, 113),
5921       simde_x_mm_set_epu8(196,  12, 188, 136, 227,  14, 111, 188,
5922                            42, 252, 141, 251,  41,  42,  48,  10),
5923       simde_mm_set_epi64x(INT64_C( 714), INT64_C( 928)) },
5924     { simde_x_mm_set_epu8(221, 210, 203,  74, 151,  53, 237,  96,
5925                           105,  62,  32, 146, 208,  27, 214,  15),
5926       simde_x_mm_set_epu8(106, 143, 238,  35, 165, 158,  48,  47,
5927                            51, 172,  84,  44, 119, 233,  73,  10),
5928       simde_mm_set_epi64x(INT64_C( 613), INT64_C( 759)) },
5929     { simde_x_mm_set_epu8(158, 146, 218,  39,  84, 176,  15, 200,
5930                           114, 100, 110,  72,  37, 118, 124,  52),
5931       simde_x_mm_set_epu8(250, 173, 237, 165,  77, 193,  83,  68,
5932                           159, 214,  52, 182, 160, 117, 236, 237),
5933       simde_mm_set_epi64x(INT64_C( 488), INT64_C( 748)) },
5934     { simde_x_mm_set_epu8(116,  17,  11, 212,  41, 247, 182,  55,
5935                           218, 151,  38, 248,  87,   3, 108,   3),
5936       simde_x_mm_set_epu8(178, 255,   4, 183,  81, 104,  79, 156,
5937                           178, 174,  55, 110, 255,  70, 179, 129),
5938       simde_mm_set_epi64x(INT64_C( 723), INT64_C( 650)) },
5939     { simde_x_mm_set_epu8( 26, 112, 229,  82, 174, 243,  79,  54,
5940                           103,  25, 150, 156, 120,  47,  29, 212),
5941       simde_x_mm_set_epu8(155, 158, 100, 233, 190, 145,   4, 176,
5942                           236,  88,  45,  24, 159, 182,  83, 215),
5943       simde_mm_set_epi64x(INT64_C( 766), INT64_C( 664)) },
5944     { simde_x_mm_set_epu8( 29, 127,  97,  34, 247,  17,  64,  73,
5945                           255,  69, 189, 150, 155,  84, 174,  88),
5946       simde_x_mm_set_epu8(224, 212,  67, 184, 190,  48, 118, 149,
5947                           209, 255,   9, 200, 126, 242, 201,  30),
5948       simde_mm_set_epi64x(INT64_C( 678), INT64_C( 734)) }
5949   };
5950 
5951   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
5952     simde__m128i r = simde_mm_sad_epu8(test_vec[i].a, test_vec[i].b);
5953     simde_assert_m128i_u8(r, ==, test_vec[i].r);
5954   }
5955 
5956   return 0;
5957 }
5958 
5959 static int
5960 test_simde_mm_set_epi8(SIMDE_MUNIT_TEST_ARGS) {
5961   const struct {
5962     int8_t e15;
5963     int8_t e14;
5964     int8_t e13;
5965     int8_t e12;
5966     int8_t e11;
5967     int8_t e10;
5968     int8_t e9;
5969     int8_t e8;
5970     int8_t e7;
5971     int8_t e6;
5972     int8_t e5;
5973     int8_t e4;
5974     int8_t e3;
5975     int8_t e2;
5976     int8_t e1;
5977     int8_t e0;
5978     simde__m128i r;
5979   } test_vec[8] = {
5980     {  -48,  -30,   88,  -96,    9,   89,   20,  -95,
5981         63,  -76,  126,   67,   85,   88,  -17, -107,
5982       simde_mm_set_epi8(INT8_C( -48), INT8_C( -30), INT8_C(  88), INT8_C( -96),
5983                         INT8_C(   9), INT8_C(  89), INT8_C(  20), INT8_C( -95),
5984                         INT8_C(  63), INT8_C( -76), INT8_C( 126), INT8_C(  67),
5985                         INT8_C(  85), INT8_C(  88), INT8_C( -17), INT8_C(-107)) },
5986     {   73,  -68,  -61,   58,  -37,    5,  -64,  -56,
5987         -5,   33,  -53,  -34,  -11,   57,   49,   12,
5988       simde_mm_set_epi8(INT8_C(  73), INT8_C( -68), INT8_C( -61), INT8_C(  58),
5989                         INT8_C( -37), INT8_C(   5), INT8_C( -64), INT8_C( -56),
5990                         INT8_C(  -5), INT8_C(  33), INT8_C( -53), INT8_C( -34),
5991                         INT8_C( -11), INT8_C(  57), INT8_C(  49), INT8_C(  12)) },
5992     {  -65, -108,   95, -117,   35,   45,   54,  -43,
5993        -45,  123,  113,   -6,   23,  -66,   77,   94,
5994       simde_mm_set_epi8(INT8_C( -65), INT8_C(-108), INT8_C(  95), INT8_C(-117),
5995                         INT8_C(  35), INT8_C(  45), INT8_C(  54), INT8_C( -43),
5996                         INT8_C( -45), INT8_C( 123), INT8_C( 113), INT8_C(  -6),
5997                         INT8_C(  23), INT8_C( -66), INT8_C(  77), INT8_C(  94)) },
5998     {  -72,   95,  112,   68,   56,  -74,  -97,  -55,
5999         22,   53,  -22,   68, -107,   99,   -5,  -94,
6000       simde_mm_set_epi8(INT8_C( -72), INT8_C(  95), INT8_C( 112), INT8_C(  68),
6001                         INT8_C(  56), INT8_C( -74), INT8_C( -97), INT8_C( -55),
6002                         INT8_C(  22), INT8_C(  53), INT8_C( -22), INT8_C(  68),
6003                         INT8_C(-107), INT8_C(  99), INT8_C(  -5), INT8_C( -94)) },
6004     {  -48,    6,  114,   89,  -57, -104,  -78,  -72,
6005        -32,  -41,  -27,  -58,   -1, -100, -126,  -52,
6006       simde_mm_set_epi8(INT8_C( -48), INT8_C(   6), INT8_C( 114), INT8_C(  89),
6007                         INT8_C( -57), INT8_C(-104), INT8_C( -78), INT8_C( -72),
6008                         INT8_C( -32), INT8_C( -41), INT8_C( -27), INT8_C( -58),
6009                         INT8_C(  -1), INT8_C(-100), INT8_C(-126), INT8_C( -52)) },
6010     {   75, -127,  -59,   90,  126,   -9,   88,   22,
6011         36,   75,  -11,  -10,   31,  -72,   19,  -30,
6012       simde_mm_set_epi8(INT8_C(  75), INT8_C(-127), INT8_C( -59), INT8_C(  90),
6013                         INT8_C( 126), INT8_C(  -9), INT8_C(  88), INT8_C(  22),
6014                         INT8_C(  36), INT8_C(  75), INT8_C( -11), INT8_C( -10),
6015                         INT8_C(  31), INT8_C( -72), INT8_C(  19), INT8_C( -30)) },
6016     {  -66,   57,   86,  -24, -102,   97,   37,   79,
6017         98,  -52,   75,  113,  -66,  -45,  -97,   50,
6018       simde_mm_set_epi8(INT8_C( -66), INT8_C(  57), INT8_C(  86), INT8_C( -24),
6019                         INT8_C(-102), INT8_C(  97), INT8_C(  37), INT8_C(  79),
6020                         INT8_C(  98), INT8_C( -52), INT8_C(  75), INT8_C( 113),
6021                         INT8_C( -66), INT8_C( -45), INT8_C( -97), INT8_C(  50)) },
6022     {  -14,  -31,   -3,   35,   62,   73,   10,   46,
6023         72,  110,  -30,   71,  -50,  -46,  106,  -75,
6024       simde_mm_set_epi8(INT8_C( -14), INT8_C( -31), INT8_C(  -3), INT8_C(  35),
6025                         INT8_C(  62), INT8_C(  73), INT8_C(  10), INT8_C(  46),
6026                         INT8_C(  72), INT8_C( 110), INT8_C( -30), INT8_C(  71),
6027                         INT8_C( -50), INT8_C( -46), INT8_C( 106), INT8_C( -75)) },
6028   };
6029 
6030   // printf("\n");
6031   // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
6032   //   int8_t e15 = munit_rand_uint32();
6033   //   int8_t e14 = munit_rand_uint32();
6034   //   int8_t e13 = munit_rand_uint32();
6035   //   int8_t e12 = munit_rand_uint32();
6036   //   int8_t e11 = munit_rand_uint32();
6037   //   int8_t e10 = munit_rand_uint32();
6038   //   int8_t e9 = munit_rand_uint32();
6039   //   int8_t e8 = munit_rand_uint32();
6040   //   int8_t e7 = munit_rand_uint32();
6041   //   int8_t e6 = munit_rand_uint32();
6042   //   int8_t e5 = munit_rand_uint32();
6043   //   int8_t e4 = munit_rand_uint32();
6044   //   int8_t e3 = munit_rand_uint32();
6045   //   int8_t e2 = munit_rand_uint32();
6046   //   int8_t e1 = munit_rand_uint32();
6047   //   int8_t e0 = munit_rand_uint32();
6048   //   simde__m128i_private r;
6049 
6050   //   r = simde__m128i_to_private(simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0));
6051 
6052   //   printf("    { %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ",\n"
6053   //          "      %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ", %4" PRId8 ",\n",
6054   //          e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0);
6055   //   printf("      simde_mm_set_epi8(INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
6056   //          "                        INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
6057   //          "                        INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "),\n"
6058   //          "                        INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 "), INT8_C(%4" PRId8 ")) },\n",
6059   //          r.i8[15], r.i8[14], r.i8[13], r.i8[12], r.i8[11], r.i8[10], r.i8[ 9], r.i8[ 8],
6060   //          r.i8[ 7], r.i8[ 6], r.i8[ 5], r.i8[ 4], r.i8[ 3], r.i8[ 2], r.i8[ 1], r.i8[ 0]);
6061   // }
6062   // return MUNIT_FAIL;
6063 
6064   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6065     simde__m128i r = simde_mm_set_epi8(
6066         test_vec[i].e15, test_vec[i].e14, test_vec[i].e13, test_vec[i].e12,
6067         test_vec[i].e11, test_vec[i].e10, test_vec[i].e9, test_vec[i].e8,
6068         test_vec[i].e7, test_vec[i].e6, test_vec[i].e5, test_vec[i].e4,
6069         test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0);
6070     simde_assert_m128i_i16(r, ==, test_vec[i].r);
6071   }
6072 
6073   return 0;
6074 }
6075 
6076 
6077 static int
6078 test_simde_mm_set_epi16(SIMDE_MUNIT_TEST_ARGS) {
6079   const struct {
6080     int16_t e7;
6081     int16_t e6;
6082     int16_t e5;
6083     int16_t e4;
6084     int16_t e3;
6085     int16_t e2;
6086     int16_t e1;
6087     int16_t e0;
6088     simde__m128i r;
6089   } test_vec[8] = {
6090     { -12714, -18436,  19109,  27542,  -4031,  11847,  32066,   4849,
6091       simde_mm_set_epi16(INT16_C(-12714), INT16_C(-18436), INT16_C( 19109), INT16_C( 27542),
6092                          INT16_C( -4031), INT16_C( 11847), INT16_C( 32066), INT16_C(  4849)) },
6093     {  20812, -18306,  32711,   2248, -22144, -30920,  20888, -23709,
6094       simde_mm_set_epi16(INT16_C( 20812), INT16_C(-18306), INT16_C( 32711), INT16_C(  2248),
6095                          INT16_C(-22144), INT16_C(-30920), INT16_C( 20888), INT16_C(-23709)) },
6096     {   8868, -14625,  -5258, -12928, -11989,  31315,  -9098,  19222,
6097       simde_mm_set_epi16(INT16_C(  8868), INT16_C(-14625), INT16_C( -5258), INT16_C(-12928),
6098                          INT16_C(-11989), INT16_C( 31315), INT16_C( -9098), INT16_C( 19222)) },
6099     {  -5334,  23871,   3901,  14443, -13328,  23359, -24889,  28356,
6100       simde_mm_set_epi16(INT16_C( -5334), INT16_C( 23871), INT16_C(  3901), INT16_C( 14443),
6101                          INT16_C(-13328), INT16_C( 23359), INT16_C(-24889), INT16_C( 28356)) },
6102     {  10774, -19043,  31284,   4044,    862, -11938, -27554, -25119,
6103       simde_mm_set_epi16(INT16_C( 10774), INT16_C(-19043), INT16_C( 31284), INT16_C(  4044),
6104                          INT16_C(   862), INT16_C(-11938), INT16_C(-27554), INT16_C(-25119)) },
6105     {  20150, -31510, -29797,  -3272, -18019,  16111, -15969, -11740,
6106       simde_mm_set_epi16(INT16_C( 20150), INT16_C(-31510), INT16_C(-29797), INT16_C( -3272),
6107                          INT16_C(-18019), INT16_C( 16111), INT16_C(-15969), INT16_C(-11740)) },
6108     {  -3147, -24243, -28710,  -5510, -20724,  13872,  -9632,  -7728,
6109       simde_mm_set_epi16(INT16_C( -3147), INT16_C(-24243), INT16_C(-28710), INT16_C( -5510),
6110                          INT16_C(-20724), INT16_C( 13872), INT16_C( -9632), INT16_C( -7728)) },
6111     {   6318,  11524,  30789,  -2974,   3458, -10908, -25743, -20801,
6112       simde_mm_set_epi16(INT16_C(  6318), INT16_C( 11524), INT16_C( 30789), INT16_C( -2974),
6113                          INT16_C(  3458), INT16_C(-10908), INT16_C(-25743), INT16_C(-20801)) },
6114   };
6115 
6116   // printf("\n");
6117   // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
6118   //   int16_t e7 = munit_rand_uint32();
6119   //   int16_t e6 = munit_rand_uint32();
6120   //   int16_t e5 = munit_rand_uint32();
6121   //   int16_t e4 = munit_rand_uint32();
6122   //   int16_t e3 = munit_rand_uint32();
6123   //   int16_t e2 = munit_rand_uint32();
6124   //   int16_t e1 = munit_rand_uint32();
6125   //   int16_t e0 = munit_rand_uint32();
6126   //   simde__m128i_private r;
6127 
6128   //   r = simde__m128i_to_private(simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0));
6129 
6130   //   printf("    { %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ", %6" PRId16 ",\n",
6131   //          e7, e6, e5, e4, e3, e2, e1, e0);
6132   //   printf("      simde_mm_set_epi16(INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "),\n"
6133 	//    "                         INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 "), INT16_C(%6" PRId16 ")) },\n",
6134   //          r.i16[7], r.i16[6], r.i16[5], r.i16[4], r.i16[3], r.i16[2], r.i16[1], r.i16[0]);
6135   // }
6136   // return MUNIT_FAIL;
6137 
6138   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6139     simde__m128i r = simde_mm_set_epi16(
6140         test_vec[i].e7, test_vec[i].e6, test_vec[i].e5, test_vec[i].e4,
6141         test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0);
6142     simde_assert_m128i_i16(r, ==, test_vec[i].r);
6143   }
6144 
6145   return 0;
6146 }
6147 
6148 static int
6149 test_simde_mm_set_epi32(SIMDE_MUNIT_TEST_ARGS) {
6150   const struct {
6151     int32_t e3;
6152     int32_t e2;
6153     int32_t e1;
6154     int32_t e0;
6155     simde__m128i r;
6156   } test_vec[8] = {
6157     {   391721235,  1158362691,  2131167346, -1830589941,
6158       simde_mm_set_epi32(INT32_C(  391721235), INT32_C( 1158362691), INT32_C( 2131167346), INT32_C(-1830589941)) },
6159     {  2141048609,  2073510589,   924258053,   594030571,
6160       simde_mm_set_epi32(INT32_C( 2141048609), INT32_C( 2073510589), INT32_C(  924258053), INT32_C(  594030571)) },
6161     {  1247989717,   798714331, -1727766974,  1099259705,
6162       simde_mm_set_epi32(INT32_C( 1247989717), INT32_C(  798714331), INT32_C(-1727766974), INT32_C( 1099259705)) },
6163     {  1870669627,  1775697551, -2027090738, -1897466045,
6164       simde_mm_set_epi32(INT32_C( 1870669627), INT32_C( 1775697551), INT32_C(-2027090738), INT32_C(-1897466045)) },
6165     {  -584467290,  2134946541,   565373055,  -212717620,
6166       simde_mm_set_epi32(INT32_C( -584467290), INT32_C( 2134946541), INT32_C(  565373055), INT32_C( -212717620)) },
6167     {  2072276971,  1968759191,  2049222745,    64876297,
6168       simde_mm_set_epi32(INT32_C( 2072276971), INT32_C( 1968759191), INT32_C( 2049222745), INT32_C(   64876297)) },
6169     {  -285499155,  -775226349,  1401270915,  -476575867,
6170       simde_mm_set_epi32(INT32_C( -285499155), INT32_C( -775226349), INT32_C( 1401270915), INT32_C( -476575867)) },
6171     {  -135350759, -1402535212,  -799024597,  1171022108,
6172       simde_mm_set_epi32(INT32_C( -135350759), INT32_C(-1402535212), INT32_C( -799024597), INT32_C( 1171022108)) },
6173 
6174   };
6175 
6176   // printf("\n");
6177   // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
6178   //   int32_t e3 = munit_rand_uint32();
6179   //   int32_t e2 = munit_rand_uint32();
6180   //   int32_t e1 = munit_rand_uint32();
6181   //   int32_t e0 = munit_rand_uint32();
6182   //   simde__m128i_private r;
6183 
6184   //   r = simde__m128i_to_private(simde_mm_set_epi32(e3, e2, e1, e0));
6185 
6186   //   printf("    { %11" PRId32 ", %11" PRId32 ", %11" PRId32 ", %11" PRId32 ",\n",
6187   //       e3, e2, e1, e0);
6188   //   printf("      simde_mm_set_epi32(INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 "), INT32_C(%11" PRId32 ")) },\n",
6189   //          r.i32[3], r.i32[2], r.i32[1], r.i32[0]);
6190   // }
6191   // return MUNIT_FAIL;
6192 
6193   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6194     simde__m128i r = simde_mm_set_epi32(
6195         test_vec[i].e3, test_vec[i].e2, test_vec[i].e1, test_vec[i].e0);
6196     simde_assert_m128i_i32(r, ==, test_vec[i].r);
6197   }
6198 
6199   return 0;
6200 }
6201 
6202 
6203 static int
6204 test_simde_mm_set_epi64(SIMDE_MUNIT_TEST_ARGS) {
6205   const struct {
6206     simde__m64 a;
6207     simde__m64 b;
6208     simde__m128i r;
6209   } test_vec[8] = {
6210     { simde_x_mm_set_pi64(INT64_C( -664890281848034973)),
6211       simde_x_mm_set_pi64(INT64_C(-2789670716680390611)),
6212       simde_mm_set_epi64x(INT64_C( -664890281848034973), INT64_C(-2789670716680390611)) },
6213     { simde_x_mm_set_pi64(INT64_C( 5148232775303872766)),
6214       simde_x_mm_set_pi64(INT64_C(-4313892930136448255)),
6215       simde_mm_set_epi64x(INT64_C( 5148232775303872766), INT64_C(-4313892930136448255)) },
6216     { simde_x_mm_set_pi64(INT64_C(-1888312870737326599)),
6217       simde_x_mm_set_pi64(INT64_C( 5248373813564878857)),
6218       simde_mm_set_epi64x(INT64_C(-1888312870737326599), INT64_C( 5248373813564878857)) },
6219     { simde_x_mm_set_pi64(INT64_C(-1560565807933837504)),
6220       simde_x_mm_set_pi64(INT64_C( 7268621988108136806)),
6221       simde_mm_set_epi64x(INT64_C(-1560565807933837504), INT64_C( 7268621988108136806)) },
6222     { simde_x_mm_set_pi64(INT64_C(-1956110667393926378)),
6223       simde_x_mm_set_pi64(INT64_C(  345154446382384077)),
6224       simde_mm_set_epi64x(INT64_C(-1956110667393926378), INT64_C(  345154446382384077)) },
6225     { simde_x_mm_set_pi64(INT64_C(-8505578167241709019)),
6226       simde_x_mm_set_pi64(INT64_C( 8252355195326597777)),
6227       simde_mm_set_epi64x(INT64_C(-8505578167241709019), INT64_C( 8252355195326597777)) },
6228     { simde_x_mm_set_pi64(INT64_C( 1122841158674863793)),
6229       simde_x_mm_set_pi64(INT64_C(-5697643761898453242)),
6230       simde_mm_set_epi64x(INT64_C( 1122841158674863793), INT64_C(-5697643761898453242)) },
6231     { simde_x_mm_set_pi64(INT64_C(-6130487997584440381)),
6232       simde_x_mm_set_pi64(INT64_C( 8349290391131198480)),
6233       simde_mm_set_epi64x(INT64_C(-6130487997584440381), INT64_C( 8349290391131198480)) }
6234   };
6235 
6236   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6237     simde__m128i r = simde_mm_set_epi64(test_vec[i].a, test_vec[i].b);
6238     simde_assert_m128i_i64(r, ==, test_vec[i].r);
6239   }
6240 
6241   return 0;
6242 }
6243 static int
6244 test_simde_mm_set_epi64x(SIMDE_MUNIT_TEST_ARGS) {
6245   const struct {
6246     int64_t e0;
6247     int64_t e1;
6248     simde__m128i r;
6249   } test_vec[8] = {
6250     {  4539993052502346892,  6550919315486945587,
6251       simde_mm_set_epi64x(INT64_C( 4539993052502346892), INT64_C( 6550919315486945587)) },
6252     { -8973439144672590874,  1846200258209621581,
6253       simde_mm_set_epi64x(INT64_C(-8973439144672590874), INT64_C( 1846200258209621581)) },
6254     {   771735515044186414, -5491872275643679405,
6255       simde_mm_set_epi64x(INT64_C(  771735515044186414), INT64_C(-5491872275643679405)) },
6256     {  3535609691698693035, -2659398015885158473,
6257       simde_mm_set_epi64x(INT64_C( 3535609691698693035), INT64_C(-2659398015885158473)) },
6258     { -5310489553719126486, -1326851720416490864,
6259       simde_mm_set_epi64x(INT64_C(-5310489553719126486), INT64_C(-1326851720416490864)) },
6260     { -1132069192689462333, -3126474808030937011,
6261       simde_mm_set_epi64x(INT64_C(-1132069192689462333), INT64_C(-3126474808030937011)) },
6262     {  3201360662826502659,  2894150994676591563,
6263       simde_mm_set_epi64x(INT64_C( 3201360662826502659), INT64_C( 2894150994676591563)) },
6264     {  5657213110111307867,  4054595932996548594,
6265       simde_mm_set_epi64x(INT64_C( 5657213110111307867), INT64_C( 4054595932996548594)) },
6266 
6267   };
6268 
6269   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6270     simde__m128i r = simde_mm_set_epi64x(test_vec[i].e0, test_vec[i].e1);
6271     simde_assert_m128i_i64(r, ==, test_vec[i].r);
6272   }
6273 
6274   return 0;
6275 }
6276 
6277 static int
6278 test_simde_mm_set1_epi8 (SIMDE_MUNIT_TEST_ARGS) {
6279   static const struct {
6280     const int8_t a;
6281     const int8_t r[16];
6282   } test_vec[] = {
6283     { -INT8_C(  57),
6284       { -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57),
6285         -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57) } },
6286     { -INT8_C(  62),
6287       { -INT8_C(  62), -INT8_C(  62), -INT8_C(  62), -INT8_C(  62), -INT8_C(  62), -INT8_C(  62), -INT8_C(  62), -INT8_C(  62),
6288         -INT8_C(  62), -INT8_C(  62), -INT8_C(  62), -INT8_C(  62), -INT8_C(  62), -INT8_C(  62), -INT8_C(  62), -INT8_C(  62) } },
6289     { -INT8_C(  94),
6290       { -INT8_C(  94), -INT8_C(  94), -INT8_C(  94), -INT8_C(  94), -INT8_C(  94), -INT8_C(  94), -INT8_C(  94), -INT8_C(  94),
6291         -INT8_C(  94), -INT8_C(  94), -INT8_C(  94), -INT8_C(  94), -INT8_C(  94), -INT8_C(  94), -INT8_C(  94), -INT8_C(  94) } },
6292     { -INT8_C(  11),
6293       { -INT8_C(  11), -INT8_C(  11), -INT8_C(  11), -INT8_C(  11), -INT8_C(  11), -INT8_C(  11), -INT8_C(  11), -INT8_C(  11),
6294         -INT8_C(  11), -INT8_C(  11), -INT8_C(  11), -INT8_C(  11), -INT8_C(  11), -INT8_C(  11), -INT8_C(  11), -INT8_C(  11) } },
6295     { -INT8_C(  57),
6296       { -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57),
6297         -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57), -INT8_C(  57) } },
6298     {  INT8_C(  73),
6299       {  INT8_C(  73),  INT8_C(  73),  INT8_C(  73),  INT8_C(  73),  INT8_C(  73),  INT8_C(  73),  INT8_C(  73),  INT8_C(  73),
6300          INT8_C(  73),  INT8_C(  73),  INT8_C(  73),  INT8_C(  73),  INT8_C(  73),  INT8_C(  73),  INT8_C(  73),  INT8_C(  73) } },
6301     {  INT8_C(  60),
6302       {  INT8_C(  60),  INT8_C(  60),  INT8_C(  60),  INT8_C(  60),  INT8_C(  60),  INT8_C(  60),  INT8_C(  60),  INT8_C(  60),
6303          INT8_C(  60),  INT8_C(  60),  INT8_C(  60),  INT8_C(  60),  INT8_C(  60),  INT8_C(  60),  INT8_C(  60),  INT8_C(  60) } },
6304     { -INT8_C(   6),
6305       { -INT8_C(   6), -INT8_C(   6), -INT8_C(   6), -INT8_C(   6), -INT8_C(   6), -INT8_C(   6), -INT8_C(   6), -INT8_C(   6),
6306         -INT8_C(   6), -INT8_C(   6), -INT8_C(   6), -INT8_C(   6), -INT8_C(   6), -INT8_C(   6), -INT8_C(   6), -INT8_C(   6) } }
6307   };
6308 
6309   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
6310     simde__m128i r = simde_mm_set1_epi8(test_vec[i].a);
6311     simde_test_x86_assert_equal_i8x16(r, simde_x_mm_loadu_epi8(test_vec[i].r));
6312   }
6313 
6314   return 0;
6315 }
6316 
6317 static int
6318 test_simde_mm_set1_epi16(SIMDE_MUNIT_TEST_ARGS) {
6319   const struct {
6320     int16_t a;
6321     simde__m128i r;
6322   } test_vec[8] = {
6323     { -22932,
6324       simde_mm_set_epi16(INT16_C(-22932), INT16_C(-22932), INT16_C(-22932), INT16_C(-22932),
6325                          INT16_C(-22932), INT16_C(-22932), INT16_C(-22932), INT16_C(-22932)) },
6326     {  23064,
6327       simde_mm_set_epi16(INT16_C( 23064), INT16_C( 23064), INT16_C( 23064), INT16_C( 23064),
6328                          INT16_C( 23064), INT16_C( 23064), INT16_C( 23064), INT16_C( 23064)) },
6329     {  29063,
6330       simde_mm_set_epi16(INT16_C( 29063), INT16_C( 29063), INT16_C( 29063), INT16_C( 29063),
6331                          INT16_C( 29063), INT16_C( 29063), INT16_C( 29063), INT16_C( 29063)) },
6332     {  -6254,
6333       simde_mm_set_epi16(INT16_C( -6254), INT16_C( -6254), INT16_C( -6254), INT16_C( -6254),
6334                          INT16_C( -6254), INT16_C( -6254), INT16_C( -6254), INT16_C( -6254)) },
6335     {  23328,
6336       simde_mm_set_epi16(INT16_C( 23328), INT16_C( 23328), INT16_C( 23328), INT16_C( 23328),
6337                          INT16_C( 23328), INT16_C( 23328), INT16_C( 23328), INT16_C( 23328)) },
6338     {  12202,
6339       simde_mm_set_epi16(INT16_C( 12202), INT16_C( 12202), INT16_C( 12202), INT16_C( 12202),
6340                          INT16_C( 12202), INT16_C( 12202), INT16_C( 12202), INT16_C( 12202)) },
6341     {  26711,
6342       simde_mm_set_epi16(INT16_C( 26711), INT16_C( 26711), INT16_C( 26711), INT16_C( 26711),
6343                          INT16_C( 26711), INT16_C( 26711), INT16_C( 26711), INT16_C( 26711)) },
6344     {  -9629,
6345       simde_mm_set_epi16(INT16_C( -9629), INT16_C( -9629), INT16_C( -9629), INT16_C( -9629),
6346                          INT16_C( -9629), INT16_C( -9629), INT16_C( -9629), INT16_C( -9629)) },
6347   };
6348 
6349   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6350     simde__m128i r = simde_mm_set1_epi16(test_vec[i].a);
6351     simde_assert_m128i_i16(r, ==, test_vec[i].r);
6352   }
6353 
6354   return 0;
6355 }
6356 
6357 static int
6358 test_simde_mm_set1_epi32 (SIMDE_MUNIT_TEST_ARGS) {
6359   static const struct {
6360     const int32_t a;
6361     const int32_t r[4];
6362   } test_vec[] = {
6363     { -INT32_C(  1379277210),
6364       { -INT32_C(  1379277210), -INT32_C(  1379277210), -INT32_C(  1379277210), -INT32_C(  1379277210) } },
6365     {  INT32_C(  1628685468),
6366       {  INT32_C(  1628685468),  INT32_C(  1628685468),  INT32_C(  1628685468),  INT32_C(  1628685468) } },
6367     {  INT32_C(  1687738541),
6368       {  INT32_C(  1687738541),  INT32_C(  1687738541),  INT32_C(  1687738541),  INT32_C(  1687738541) } },
6369     {  INT32_C(  1891425133),
6370       {  INT32_C(  1891425133),  INT32_C(  1891425133),  INT32_C(  1891425133),  INT32_C(  1891425133) } },
6371     {  INT32_C(  1695660386),
6372       {  INT32_C(  1695660386),  INT32_C(  1695660386),  INT32_C(  1695660386),  INT32_C(  1695660386) } },
6373     {  INT32_C(  1846447439),
6374       {  INT32_C(  1846447439),  INT32_C(  1846447439),  INT32_C(  1846447439),  INT32_C(  1846447439) } },
6375     {  INT32_C(   958687000),
6376       {  INT32_C(   958687000),  INT32_C(   958687000),  INT32_C(   958687000),  INT32_C(   958687000) } },
6377     { -INT32_C(  1238079408),
6378       { -INT32_C(  1238079408), -INT32_C(  1238079408), -INT32_C(  1238079408), -INT32_C(  1238079408) } }
6379   };
6380 
6381   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
6382     simde__m128i r = simde_mm_set1_epi32(test_vec[i].a);
6383     simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec[i].r));
6384   }
6385 
6386   return 0;
6387 }
6388 
6389 static int
6390 test_simde_mm_set1_pd(SIMDE_MUNIT_TEST_ARGS) {
6391   const struct {
6392     simde_float64 a;
6393     simde__m128d r;
6394   } test_vec[8] = {
6395     { SIMDE_FLOAT64_C(  922.45),
6396       simde_mm_set_pd(SIMDE_FLOAT64_C(  922.45), SIMDE_FLOAT64_C(  922.45)) },
6397     { SIMDE_FLOAT64_C( -599.83),
6398       simde_mm_set_pd(SIMDE_FLOAT64_C( -599.83), SIMDE_FLOAT64_C( -599.83)) },
6399     { SIMDE_FLOAT64_C( -398.06),
6400       simde_mm_set_pd(SIMDE_FLOAT64_C( -398.06), SIMDE_FLOAT64_C( -398.06)) },
6401     { SIMDE_FLOAT64_C(  758.75),
6402       simde_mm_set_pd(SIMDE_FLOAT64_C(  758.75), SIMDE_FLOAT64_C(  758.75)) },
6403     { SIMDE_FLOAT64_C( -273.82),
6404       simde_mm_set_pd(SIMDE_FLOAT64_C( -273.82), SIMDE_FLOAT64_C( -273.82)) },
6405     { SIMDE_FLOAT64_C( -320.64),
6406       simde_mm_set_pd(SIMDE_FLOAT64_C( -320.64), SIMDE_FLOAT64_C( -320.64)) },
6407     { SIMDE_FLOAT64_C(  627.18),
6408       simde_mm_set_pd(SIMDE_FLOAT64_C(  627.18), SIMDE_FLOAT64_C(  627.18)) },
6409     { SIMDE_FLOAT64_C(  433.85),
6410       simde_mm_set_pd(SIMDE_FLOAT64_C(  433.85), SIMDE_FLOAT64_C(  433.85)) }
6411   };
6412 
6413   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6414     simde__m128d r = simde_mm_set1_pd(test_vec[i].a);
6415     simde_assert_m128d_close(r, test_vec[i].r, 1);
6416   }
6417 
6418   return 0;
6419 }
6420 
6421 static int
6422 test_simde_mm_set_pd(SIMDE_MUNIT_TEST_ARGS) {
6423   const struct {
6424     double e0;
6425     double e1;
6426     simde__m128d r;
6427   } test_vec[8] = {
6428     { (1062807988.00), 4166063422.00,
6429       simde_mm_set_pd(SIMDE_FLOAT64_C(1062807988.00), SIMDE_FLOAT64_C(4166063422.00)) },
6430     { (4089462150.00), 3301875355.00,
6431       simde_mm_set_pd(SIMDE_FLOAT64_C(4089462150.00), SIMDE_FLOAT64_C(3301875355.00)) },
6432     { (2961047618.00), 1310362259.00,
6433       simde_mm_set_pd(SIMDE_FLOAT64_C(2961047618.00), SIMDE_FLOAT64_C(1310362259.00)) },
6434     { (491413403.00), 2980697460.00,
6435       simde_mm_set_pd(SIMDE_FLOAT64_C(491413403.00), SIMDE_FLOAT64_C(2980697460.00)) },
6436     { (3027292014.00), 1034055676.00,
6437       simde_mm_set_pd(SIMDE_FLOAT64_C(3027292014.00), SIMDE_FLOAT64_C(1034055676.00)) },
6438     { (133655993.00), 2416999239.00,
6439       simde_mm_set_pd(SIMDE_FLOAT64_C(133655993.00), SIMDE_FLOAT64_C(2416999239.00)) },
6440     { (2396615078.00), 517112175.00,
6441       simde_mm_set_pd(SIMDE_FLOAT64_C(2396615078.00), SIMDE_FLOAT64_C(517112175.00)) },
6442     { (628434760.00), 1544868779.00,
6443       simde_mm_set_pd(SIMDE_FLOAT64_C(628434760.00), SIMDE_FLOAT64_C(1544868779.00)) },
6444   };
6445 
6446   // printf("\n");
6447   // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
6448   //   double e0 = munit_rand_uint32() ;
6449   //   double e1 = munit_rand_uint32() ;
6450   //   simde__m128d_private r;
6451 
6452   //   r = simde__m128d_to_private(simde_mm_set_pd(e0, e1));
6453 
6454   //   printf("    { (%*.2f), %*.2f,\n", 8, e0, 8, e1);
6455   //   printf("      simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]);
6456   // }
6457   // return MUNIT_FAIL;
6458 
6459   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6460     simde__m128d r = simde_mm_set_pd(test_vec[i].e0, test_vec[i].e1);
6461     simde_assert_m128d_close(r, test_vec[i].r, 1);
6462   }
6463 
6464   return 0;
6465 }
6466 
6467 static int
6468 test_simde_mm_set_pd1(SIMDE_MUNIT_TEST_ARGS) {
6469   const struct {
6470     double a;
6471     simde__m128d r;
6472   } test_vec[8] = {
6473     { (983122077.00),
6474       simde_mm_set_pd(SIMDE_FLOAT64_C(983122077.00), SIMDE_FLOAT64_C(983122077.00)) },
6475     { (2243688041.00),
6476       simde_mm_set_pd(SIMDE_FLOAT64_C(2243688041.00), SIMDE_FLOAT64_C(2243688041.00)) },
6477     { (1259032742.00),
6478       simde_mm_set_pd(SIMDE_FLOAT64_C(1259032742.00), SIMDE_FLOAT64_C(1259032742.00)) },
6479     { (945157531.00),
6480       simde_mm_set_pd(SIMDE_FLOAT64_C(945157531.00), SIMDE_FLOAT64_C(945157531.00)) },
6481     { (2547177525.00),
6482       simde_mm_set_pd(SIMDE_FLOAT64_C(2547177525.00), SIMDE_FLOAT64_C(2547177525.00)) },
6483     { (2112014239.00),
6484       simde_mm_set_pd(SIMDE_FLOAT64_C(2112014239.00), SIMDE_FLOAT64_C(2112014239.00)) },
6485     { (1570949017.00),
6486       simde_mm_set_pd(SIMDE_FLOAT64_C(1570949017.00), SIMDE_FLOAT64_C(1570949017.00)) },
6487     { (1215464208.00),
6488       simde_mm_set_pd(SIMDE_FLOAT64_C(1215464208.00), SIMDE_FLOAT64_C(1215464208.00)) },
6489   };
6490 
6491   // printf("\n");
6492   // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
6493   //   double a = munit_rand_uint32() ;
6494   //   simde__m128d_private r;
6495 
6496   //   r = simde__m128d_to_private(simde_mm_set_pd1(a));
6497 
6498   //   printf("    { (%*.2f),\n", 8, a);
6499   //   printf("      simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]);
6500   // }
6501   // return MUNIT_FAIL;
6502 
6503   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6504     simde__m128d r = simde_mm_set_pd1(test_vec[i].a);
6505     simde_assert_m128d_close(r, test_vec[i].r, 1);
6506   }
6507 
6508   return 0;
6509 }
6510 
6511 
6512 static int
6513 test_simde_mm_set_sd(SIMDE_MUNIT_TEST_ARGS) {
6514   const struct {
6515     simde_float64 a;
6516     simde__m128d r;
6517   } test_vec[8] = {
6518     { SIMDE_FLOAT64_C( -222.00),
6519       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C( -222.00)) },
6520     { SIMDE_FLOAT64_C(  804.62),
6521       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(  804.62)) },
6522     { SIMDE_FLOAT64_C(  845.92),
6523       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(  845.92)) },
6524     { SIMDE_FLOAT64_C(  892.20),
6525       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(  892.20)) },
6526     { SIMDE_FLOAT64_C(  233.47),
6527       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(  233.47)) },
6528     { SIMDE_FLOAT64_C( -916.51),
6529       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C( -916.51)) },
6530     { SIMDE_FLOAT64_C(   -0.11),
6531       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C(   -0.11)) },
6532     { SIMDE_FLOAT64_C( -843.72),
6533       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.00), SIMDE_FLOAT64_C( -843.72)) }
6534   };
6535 
6536   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6537     simde__m128d r = simde_mm_set_sd(test_vec[i].a);
6538     simde_assert_m128d_close(r, test_vec[i].r, 1);
6539   }
6540 
6541   return 0;
6542 }
6543 
6544 static int
6545 test_simde_mm_set1_epi64(SIMDE_MUNIT_TEST_ARGS) {
6546   const struct {
6547     simde__m64 a;
6548     simde__m128i r;
6549   } test_vec[8] = {
6550     { simde_x_mm_set_pi64(INT64_C(5509445936599134262)),
6551       simde_mm_set_epi64x(INT64_C(5509445936599134262), INT64_C(5509445936599134262)) },
6552     { simde_x_mm_set_pi64(INT64_C(6533321325309895597)),
6553       simde_mm_set_epi64x(INT64_C(6533321325309895597), INT64_C(6533321325309895597)) },
6554     { simde_x_mm_set_pi64(INT64_C(8570268616515205604)),
6555       simde_mm_set_epi64x(INT64_C(8570268616515205604), INT64_C(8570268616515205604)) },
6556     { simde_x_mm_set_pi64(INT64_C(6893954556242409981)),
6557       simde_mm_set_epi64x(INT64_C(6893954556242409981), INT64_C(6893954556242409981)) },
6558     { simde_x_mm_set_pi64(INT64_C( 479685313418970755)),
6559       simde_mm_set_epi64x(INT64_C( 479685313418970755), INT64_C( 479685313418970755)) },
6560     { simde_x_mm_set_pi64(INT64_C(1310625044422752521)),
6561       simde_mm_set_epi64x(INT64_C(1310625044422752521), INT64_C(1310625044422752521)) },
6562     { simde_x_mm_set_pi64(INT64_C(-9181800088333422881)),
6563       simde_mm_set_epi64x(INT64_C(-9181800088333422881), INT64_C(-9181800088333422881)) },
6564     { simde_x_mm_set_pi64(INT64_C(-4247659939651135559)),
6565       simde_mm_set_epi64x(INT64_C(-4247659939651135559), INT64_C(-4247659939651135559)) }
6566   };
6567 
6568   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6569     simde__m128i r = simde_mm_set1_epi64(test_vec[i].a);
6570     simde_assert_m128i_i64(r, ==, test_vec[i].r);
6571   }
6572 
6573   return 0;
6574 }
6575 
6576 static int
6577 test_simde_mm_set1_epi64x(SIMDE_MUNIT_TEST_ARGS) {
6578   const struct {
6579     int64_t a;
6580     simde__m128i r;
6581   } test_vec[8] = {
6582     { INT64_C(-7342192307236287075),
6583       simde_mm_set_epi64x(INT64_C(-7342192307236287075), INT64_C(-7342192307236287075)) },
6584     { INT64_C(-8079223173243549940),
6585       simde_mm_set_epi64x(INT64_C(-8079223173243549940), INT64_C(-8079223173243549940)) },
6586     { INT64_C(8128959178680760661),
6587       simde_mm_set_epi64x(INT64_C(8128959178680760661), INT64_C(8128959178680760661)) },
6588     { INT64_C(6271233176655491948),
6589       simde_mm_set_epi64x(INT64_C(6271233176655491948), INT64_C(6271233176655491948)) },
6590     { INT64_C(3474926301195230116),
6591       simde_mm_set_epi64x(INT64_C(3474926301195230116), INT64_C(3474926301195230116)) },
6592     { INT64_C(-5217363481586450008),
6593       simde_mm_set_epi64x(INT64_C(-5217363481586450008), INT64_C(-5217363481586450008)) },
6594     { INT64_C(-7156667910834929798),
6595       simde_mm_set_epi64x(INT64_C(-7156667910834929798), INT64_C(-7156667910834929798)) },
6596     { INT64_C(8467790055770652882),
6597       simde_mm_set_epi64x(INT64_C(8467790055770652882), INT64_C(8467790055770652882)) }
6598   };
6599 
6600   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6601     simde__m128i r = simde_mm_set1_epi64x(test_vec[i].a);
6602     simde_assert_m128i_i64(r, ==, test_vec[i].r);
6603   }
6604 
6605   return 0;
6606 }
6607 
6608 static int
6609 test_simde_mm_setr_epi8(SIMDE_MUNIT_TEST_ARGS) {
6610   const struct {
6611     int8_t a[16];
6612     simde__m128i r;
6613   } test_vec[8] = {
6614     { { -117,  101,  -68,  -84,   57,  -16,   14,  112,
6615           -4,  -62,   47,  -17,   21,   25,  -74,   93 },
6616       simde_mm_set_epi8(INT8_C(  93), INT8_C( -74), INT8_C(  25), INT8_C(  21), INT8_C( -17), INT8_C(  47), INT8_C( -62), INT8_C(  -4),
6617                         INT8_C( 112), INT8_C(  14), INT8_C( -16), INT8_C(  57), INT8_C( -84), INT8_C( -68), INT8_C( 101), INT8_C(-117)) },
6618     { { -121,  -99,   93,   75,  -45,   61,  -29,   21,
6619           43,  -55, -114,  120,    9,  -25,  107,  106 },
6620       simde_mm_set_epi8(INT8_C( 106), INT8_C( 107), INT8_C( -25), INT8_C(   9), INT8_C( 120), INT8_C(-114), INT8_C( -55), INT8_C(  43),
6621                         INT8_C(  21), INT8_C( -29), INT8_C(  61), INT8_C( -45), INT8_C(  75), INT8_C(  93), INT8_C( -99), INT8_C(-121)) },
6622     { {   17,  120,   33,  -15,  -38,  -48,   75,  -19,
6623          105,  -73,  -87,   91,   57,  125,   70,   11 },
6624       simde_mm_set_epi8(INT8_C(  11), INT8_C(  70), INT8_C( 125), INT8_C(  57), INT8_C(  91), INT8_C( -87), INT8_C( -73), INT8_C( 105),
6625                         INT8_C( -19), INT8_C(  75), INT8_C( -48), INT8_C( -38), INT8_C( -15), INT8_C(  33), INT8_C( 120), INT8_C(  17)) },
6626     { {   56,  -40,   93,   54,    0, -115,  -62,    6,
6627           10,  -58,  -12,   31,  -96,   67,   12,   19 },
6628       simde_mm_set_epi8(INT8_C(  19), INT8_C(  12), INT8_C(  67), INT8_C( -96), INT8_C(  31), INT8_C( -12), INT8_C( -58), INT8_C(  10),
6629                         INT8_C(   6), INT8_C( -62), INT8_C(-115), INT8_C(   0), INT8_C(  54), INT8_C(  93), INT8_C( -40), INT8_C(  56)) },
6630     { {   37,  -21,   96,  -83,   46,  -81,  -51,  -14,
6631          127,   26,  -91,  -48,   45,  -55, -111,  109 },
6632       simde_mm_set_epi8(INT8_C( 109), INT8_C(-111), INT8_C( -55), INT8_C(  45), INT8_C( -48), INT8_C( -91), INT8_C(  26), INT8_C( 127),
6633                         INT8_C( -14), INT8_C( -51), INT8_C( -81), INT8_C(  46), INT8_C( -83), INT8_C(  96), INT8_C( -21), INT8_C(  37)) },
6634     { {  -77,   43,  114,  -94,  -36,  -86,  -18,   18,
6635           14,   -4,   99,   78,   44,   70,  105,  -91 },
6636       simde_mm_set_epi8(INT8_C( -91), INT8_C( 105), INT8_C(  70), INT8_C(  44), INT8_C(  78), INT8_C(  99), INT8_C(  -4), INT8_C(  14),
6637                         INT8_C(  18), INT8_C( -18), INT8_C( -86), INT8_C( -36), INT8_C( -94), INT8_C( 114), INT8_C(  43), INT8_C( -77)) },
6638     { {  125,  -73,  -25, -106,   -9,  112,  -96,   59,
6639           61,  -50,   73,  -71,   13,    0,  -64,  -15 },
6640       simde_mm_set_epi8(INT8_C( -15), INT8_C( -64), INT8_C(   0), INT8_C(  13), INT8_C( -71), INT8_C(  73), INT8_C( -50), INT8_C(  61),
6641                         INT8_C(  59), INT8_C( -96), INT8_C( 112), INT8_C(  -9), INT8_C(-106), INT8_C( -25), INT8_C( -73), INT8_C( 125)) },
6642     { {   76,   81,  -62,   21,   -3,   99,  -61,  126,
6643          -15,  -95,   99,  -34,   78,   36,   56,  -38 },
6644       simde_mm_set_epi8(INT8_C( -38), INT8_C(  56), INT8_C(  36), INT8_C(  78), INT8_C( -34), INT8_C(  99), INT8_C( -95), INT8_C( -15),
6645                         INT8_C( 126), INT8_C( -61), INT8_C(  99), INT8_C(  -3), INT8_C(  21), INT8_C( -62), INT8_C(  81), INT8_C(  76)) }
6646   };
6647 
6648   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6649     simde__m128i r =
6650       simde_mm_setr_epi8(test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3],
6651                          test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7],
6652                          test_vec[i].a[ 8], test_vec[i].a[ 9], test_vec[i].a[10], test_vec[i].a[11],
6653                          test_vec[i].a[12], test_vec[i].a[13], test_vec[i].a[14], test_vec[i].a[15]);
6654     simde_assert_m128i_i8(r, ==, test_vec[i].r);
6655   }
6656 
6657   return 0;
6658 }
6659 
6660 static int
6661 test_simde_mm_setr_epi16(SIMDE_MUNIT_TEST_ARGS) {
6662   const struct {
6663     int16_t a[8];
6664     simde__m128i r;
6665   } test_vec[8] = {
6666     { { -10562,  -1563,   3119,   8148, -20473,  28066,  19911,  32415 },
6667       simde_mm_set_epi16(INT16_C( 32415), INT16_C( 19911), INT16_C( 28066), INT16_C(-20473),
6668                          INT16_C(  8148), INT16_C(  3119), INT16_C( -1563), INT16_C(-10562)) },
6669     { {  -5842, -19524,  19809,  -4522, -18693, -13515,  10296, -11468 },
6670       simde_mm_set_epi16(INT16_C(-11468), INT16_C( 10296), INT16_C(-13515), INT16_C(-18693),
6671                          INT16_C( -4522), INT16_C( 19809), INT16_C(-19524), INT16_C( -5842)) },
6672     { {  21973, -10968, -22468,   4564,  15035,   4920,  15286,  10966 },
6673       simde_mm_set_epi16(INT16_C( 10966), INT16_C( 15286), INT16_C(  4920), INT16_C( 15035),
6674                          INT16_C(  4564), INT16_C(-22468), INT16_C(-10968), INT16_C( 21973)) },
6675     { { -30861,  17137,  12124,  23736,  -1854,  30822, -26631,  14095 },
6676       simde_mm_set_epi16(INT16_C( 14095), INT16_C(-26631), INT16_C( 30822), INT16_C( -1854),
6677                          INT16_C( 23736), INT16_C( 12124), INT16_C( 17137), INT16_C(-30861)) },
6678     { {  -8301, -14416, -32194,  -4341,   1212,  26290, -16654,  -9801 },
6679       simde_mm_set_epi16(INT16_C( -9801), INT16_C(-16654), INT16_C( 26290), INT16_C(  1212),
6680                          INT16_C( -4341), INT16_C(-32194), INT16_C(-14416), INT16_C( -8301)) },
6681     { {  -5842,  17831,    171,  10031,   7446,  23430,  -5408, -23387 },
6682       simde_mm_set_epi16(INT16_C(-23387), INT16_C( -5408), INT16_C( 23430), INT16_C(  7446),
6683                          INT16_C( 10031), INT16_C(   171), INT16_C( 17831), INT16_C( -5842)) },
6684     { {   3343, -24774,  -5050,  25934, -13848,  27661,  13484,  -5817 },
6685       simde_mm_set_epi16(INT16_C( -5817), INT16_C( 13484), INT16_C( 27661), INT16_C(-13848),
6686                          INT16_C( 25934), INT16_C( -5050), INT16_C(-24774), INT16_C(  3343)) },
6687     { {  27516, -24147, -18268,  10553,  12061, -22335,  29977, -25416 },
6688       simde_mm_set_epi16(INT16_C(-25416), INT16_C( 29977), INT16_C(-22335), INT16_C( 12061),
6689                          INT16_C( 10553), INT16_C(-18268), INT16_C(-24147), INT16_C( 27516)) }
6690   };
6691 
6692   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6693     simde__m128i r =
6694       simde_mm_setr_epi16(test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3],
6695                           test_vec[i].a[ 4], test_vec[i].a[ 5], test_vec[i].a[ 6], test_vec[i].a[ 7]);
6696     simde_assert_m128i_i16(r, ==, test_vec[i].r);
6697   }
6698 
6699   return 0;
6700 }
6701 
6702 static int
6703 test_simde_mm_setr_epi32(SIMDE_MUNIT_TEST_ARGS) {
6704   const struct {
6705     int32_t a[4];
6706     simde__m128i r;
6707   } test_vec[8] = {
6708     { { INT32_C(  576930619), INT32_C(-1056617076), INT32_C( 1391020156), INT32_C( -119436850) },
6709       simde_mm_set_epi32(INT32_C( -119436850), INT32_C( 1391020156), INT32_C(-1056617076), INT32_C(  576930619)) },
6710     { { INT32_C(-2038323421), INT32_C(-1916700674), INT32_C( 1438851519), INT32_C( 1990196695) },
6711       simde_mm_set_epi32(INT32_C( 1990196695), INT32_C( 1438851519), INT32_C(-1916700674), INT32_C(-2038323421)) },
6712     { { INT32_C( 1146758814), INT32_C(  625179194), INT32_C(-1226824864), INT32_C(-1523319395) },
6713       simde_mm_set_epi32(INT32_C(-1523319395), INT32_C(-1226824864), INT32_C(  625179194), INT32_C( 1146758814)) },
6714     { { INT32_C( -276839793), INT32_C( 1178530072), INT32_C(-1956542830), INT32_C( -556652843) },
6715       simde_mm_set_epi32(INT32_C( -556652843), INT32_C(-1956542830), INT32_C( 1178530072), INT32_C( -276839793)) },
6716     { { INT32_C(-1720519476), INT32_C(  147115658), INT32_C(  736217848), INT32_C(-1149123643) },
6717       simde_mm_set_epi32(INT32_C(-1149123643), INT32_C(  736217848), INT32_C(  147115658), INT32_C(-1720519476)) },
6718     { { INT32_C( 1888725856), INT32_C( -696349459), INT32_C(-1872984731), INT32_C( 1198325431) },
6719       simde_mm_set_epi32(INT32_C( 1198325431), INT32_C(-1872984731), INT32_C( -696349459), INT32_C( 1888725856)) },
6720     { { INT32_C( 1999809110), INT32_C( -469856594), INT32_C(-1721902839), INT32_C(-1910021155) },
6721       simde_mm_set_epi32(INT32_C(-1910021155), INT32_C(-1721902839), INT32_C( -469856594), INT32_C( 1999809110)) },
6722     { { INT32_C(   57396463), INT32_C(-1219624618), INT32_C( -492678555), INT32_C(-1751286944) },
6723       simde_mm_set_epi32(INT32_C(-1751286944), INT32_C( -492678555), INT32_C(-1219624618), INT32_C(   57396463)) }
6724   };
6725 
6726   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6727     simde__m128i r = simde_mm_setr_epi32(test_vec[i].a[ 0], test_vec[i].a[ 1], test_vec[i].a[ 2], test_vec[i].a[ 3]);
6728     simde_assert_m128i_i32(r, ==, test_vec[i].r);
6729   }
6730 
6731   return 0;
6732 }
6733 
6734 static int
6735 test_simde_mm_setr_epi64(SIMDE_MUNIT_TEST_ARGS) {
6736   const struct {
6737     simde__m64 e1;
6738     simde__m64 e0;
6739     simde__m128i r;
6740   } test_vec[8] = {
6741     { simde_mm_cvtsi64_m64(INT64_C(-4101257248168872649)),
6742       simde_mm_cvtsi64_m64(INT64_C(-2723834683478465794)),
6743       simde_mm_set_epi64x(INT64_C(-2723834683478465794), INT64_C(-4101257248168872649)) },
6744     { simde_mm_cvtsi64_m64(INT64_C(-2051996013747413745)),
6745       simde_mm_cvtsi64_m64(INT64_C(-3184937756541660331)),
6746       simde_mm_set_epi64x(INT64_C(-3184937756541660331), INT64_C(-2051996013747413745)) },
6747     { simde_mm_cvtsi64_m64(INT64_C(-1223296052051875883)),
6748       simde_mm_cvtsi64_m64(INT64_C( 3027248353112135930)),
6749       simde_mm_set_epi64x(INT64_C( 3027248353112135930), INT64_C(-1223296052051875883)) },
6750     { simde_mm_cvtsi64_m64(INT64_C(-8279962275226206621)),
6751       simde_mm_cvtsi64_m64(INT64_C(-2814925648380381958)),
6752       simde_mm_set_epi64x(INT64_C(-2814925648380381958), INT64_C(-8279962275226206621)) },
6753     { simde_mm_cvtsi64_m64(INT64_C( 6755033167475904984)),
6754       simde_mm_cvtsi64_m64(INT64_C(-8685825248847164354)),
6755       simde_mm_set_epi64x(INT64_C(-8685825248847164354), INT64_C( 6755033167475904984)) },
6756     { simde_mm_cvtsi64_m64(INT64_C( 1859833649283237251)),
6757       simde_mm_cvtsi64_m64(INT64_C( 4744285272371342192)),
6758       simde_mm_set_epi64x(INT64_C( 4744285272371342192), INT64_C( 1859833649283237251)) },
6759     { simde_mm_cvtsi64_m64(INT64_C(-2932310525767688549)),
6760       simde_mm_cvtsi64_m64(INT64_C(-5821145293930307405)),
6761       simde_mm_set_epi64x(INT64_C(-5821145293930307405), INT64_C(-2932310525767688549)) },
6762     { simde_mm_cvtsi64_m64(INT64_C( 6748921357249852483)),
6763       simde_mm_cvtsi64_m64(INT64_C(-4633625703225321444)),
6764       simde_mm_set_epi64x(INT64_C(-4633625703225321444), INT64_C( 6748921357249852483)) },
6765   };
6766 
6767   // printf("\n");
6768   // for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6769   //   simde__m64_private a, b;
6770   //   simde__m128i_private r;
6771 
6772   //   munit_rand_memory(sizeof(a), (uint8_t*) &a);
6773   //   munit_rand_memory(sizeof(b), (uint8_t*) &b);
6774 
6775   //   r = simde__m128i_to_private(simde_mm_setr_epi64(simde__m64_from_private(a), simde__m64_from_private(b)));
6776 
6777   //   printf("    { simde_mm_cvtsi64_m64(INT64_C(%20" PRId64 ")),\n", a.i64[0]);
6778   //   printf("      simde_mm_cvtsi64_m64(INT64_C(%20" PRId64 ")),\n", b.i64[0]);
6779   //   printf("      simde_mm_set_epi64x(INT64_C(%20" PRId64 "), INT64_C(%20" PRId64 ")) },\n", r.i64[1], r.i64[0]);
6780   // }
6781   // return MUNIT_FAIL;
6782 
6783   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6784     simde__m128i r = simde_mm_setr_epi64(test_vec[i].e1, test_vec[i].e0);
6785     simde_assert_m128i_i64(r, ==, test_vec[i].r);
6786   }
6787 
6788   return 0;
6789 }
6790 
6791 static int
6792 test_simde_mm_setzero_pd(SIMDE_MUNIT_TEST_ARGS) {
6793   simde__m128d a = simde_mm_set1_pd(0);
6794   simde__m128d r = simde_mm_setzero_pd();
6795 
6796   simde_assert_m128d_equal(a, r);
6797 
6798   return 0;
6799 }
6800 
6801 static int
6802 test_simde_mm_setzero_si128(SIMDE_MUNIT_TEST_ARGS) {
6803   simde__m128i a = simde_mm_set1_epi32(0);
6804   simde__m128i r = simde_mm_setzero_si128();
6805 
6806   simde_assert_m128i_i32(a, ==, r);
6807 
6808   return 0;
6809 }
6810 
6811 static int
6812 test_simde_mm_shuffle_epi32(SIMDE_MUNIT_TEST_ARGS) {
6813   const struct {
6814     simde__m128i a;
6815     simde__m128i r;
6816   } test_vec[8] = {
6817     { simde_mm_set_epi32(INT32_C( -749480461), INT32_C(-1872761030), INT32_C( 1690143325), INT32_C( -258848374)),
6818       simde_mm_set_epi32(INT32_C(-1872761030), INT32_C(-1872761030), INT32_C(-1872761030), INT32_C(-1872761030)) },
6819     { simde_mm_set_epi32(INT32_C( 1030695986), INT32_C( 1932252260), INT32_C( 1962976759), INT32_C(-1621624916)),
6820       simde_mm_set_epi32(INT32_C( 1932252260), INT32_C( 1932252260), INT32_C( 1932252260), INT32_C( 1932252260)) },
6821     { simde_mm_set_epi32(INT32_C( -897180326), INT32_C( 1675136548), INT32_C( 1746269378), INT32_C( 1984702409)),
6822       simde_mm_set_epi32(INT32_C( 1675136548), INT32_C( 1675136548), INT32_C( 1675136548), INT32_C( 1675136548)) },
6823     { simde_mm_set_epi32(INT32_C(  -11612835), INT32_C(-1878653813), INT32_C(-2135957543), INT32_C( -134555953)),
6824       simde_mm_set_epi32(INT32_C(-1878653813), INT32_C(-1878653813), INT32_C(-1878653813), INT32_C(-1878653813)) },
6825     { simde_mm_set_epi32(INT32_C( 1051337342), INT32_C(  755742115), INT32_C(  338927136), INT32_C( 1410014436)),
6826       simde_mm_set_epi32(INT32_C(  755742115), INT32_C(  755742115), INT32_C(  755742115), INT32_C(  755742115)) },
6827     { simde_mm_set_epi32(INT32_C(-1826960183), INT32_C( -119444047), INT32_C(-1224980361), INT32_C( 1323381864)),
6828       simde_mm_set_epi32(INT32_C( -119444047), INT32_C( -119444047), INT32_C( -119444047), INT32_C( -119444047)) },
6829     { simde_mm_set_epi32(INT32_C( 1256541920), INT32_C( 1446192699), INT32_C( -117794523), INT32_C(-1904270778)),
6830       simde_mm_set_epi32(INT32_C( 1446192699), INT32_C( 1446192699), INT32_C( 1446192699), INT32_C( 1446192699)) },
6831     { simde_mm_set_epi32(INT32_C(  542509546), INT32_C(-1970305999), INT32_C(-1492486994), INT32_C( 1078541043)),
6832       simde_mm_set_epi32(INT32_C(-1970305999), INT32_C(-1970305999), INT32_C(-1970305999), INT32_C(-1970305999)) }
6833   };
6834 
6835   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6836     simde__m128i r = simde_mm_shuffle_epi32(test_vec[i].a, 0xaa);
6837     simde_assert_m128i_i32(r, ==, test_vec[i].r);
6838   }
6839 
6840   return 0;
6841 }
6842 
6843 static int
6844 test_simde_mm_setr_pd(SIMDE_MUNIT_TEST_ARGS) {
6845   const struct {
6846     double e1;
6847     double e0;
6848     simde__m128d r;
6849   } test_vec[8] = {
6850     {     0.74,     0.57,
6851       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.57), SIMDE_FLOAT64_C(    0.74)) },
6852     {     0.52,     0.66,
6853       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.66), SIMDE_FLOAT64_C(    0.52)) },
6854     {     0.54,     0.56,
6855       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.56), SIMDE_FLOAT64_C(    0.54)) },
6856     {     0.95,     0.43,
6857       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.43), SIMDE_FLOAT64_C(    0.95)) },
6858     {     0.53,     0.46,
6859       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.46), SIMDE_FLOAT64_C(    0.53)) },
6860     {     0.33,     0.39,
6861       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.39), SIMDE_FLOAT64_C(    0.33)) },
6862     {     0.48,     0.63,
6863       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.63), SIMDE_FLOAT64_C(    0.48)) },
6864     {     0.08,     0.44,
6865       simde_mm_set_pd(SIMDE_FLOAT64_C(    0.44), SIMDE_FLOAT64_C(    0.08)) },
6866   };
6867 
6868   // printf("\n");
6869   // for (size_t i = 0 ; i < (sizeof(test_vec) / (sizeof(test_vec[0]))) ; i++) {
6870   //   double e1 = munit_rand_double();
6871   //   double e0 = munit_rand_double();
6872   //   simde__m128d_private r;
6873 
6874   //   r = simde__m128d_to_private(simde_mm_setr_pd(e1, e0));
6875 
6876   //   printf("    { %*.2f, %*.2f,\n", 8, e1 , 8, e0);
6877   //   printf("      simde_mm_set_pd(SIMDE_FLOAT64_C(%*.2f), SIMDE_FLOAT64_C(%*.2f)) },\n", 8, r.f64[1], 8, r.f64[0]);
6878   // }
6879   // return MUNIT_FAIL;
6880 
6881   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6882     simde__m128d r = simde_mm_setr_pd(test_vec[i].e1, test_vec[i].e0);
6883     simde_assert_m128d_close(r, test_vec[i].r, 1);
6884   }
6885 
6886   return 0;
6887 }
6888 
6889 static int
6890 test_simde_mm_shuffle_pd(SIMDE_MUNIT_TEST_ARGS) {
6891   const struct {
6892     simde__m128d a;
6893     simde__m128d b;
6894     simde__m128d r;
6895   } test_vec[8] = {
6896     { simde_mm_set_pd(SIMDE_FLOAT64_C(  897.05), SIMDE_FLOAT64_C(  524.15)),
6897       simde_mm_set_pd(SIMDE_FLOAT64_C( -346.39), SIMDE_FLOAT64_C( -595.93)),
6898       simde_mm_set_pd(SIMDE_FLOAT64_C( -595.93), SIMDE_FLOAT64_C(  524.15)) },
6899     { simde_mm_set_pd(SIMDE_FLOAT64_C( -684.88), SIMDE_FLOAT64_C(   62.45)),
6900       simde_mm_set_pd(SIMDE_FLOAT64_C(  765.70), SIMDE_FLOAT64_C( -126.52)),
6901       simde_mm_set_pd(SIMDE_FLOAT64_C( -126.52), SIMDE_FLOAT64_C(   62.45)) },
6902     { simde_mm_set_pd(SIMDE_FLOAT64_C( -871.69), SIMDE_FLOAT64_C( -753.55)),
6903       simde_mm_set_pd(SIMDE_FLOAT64_C( -923.31), SIMDE_FLOAT64_C( -103.97)),
6904       simde_mm_set_pd(SIMDE_FLOAT64_C( -103.97), SIMDE_FLOAT64_C( -753.55)) },
6905     { simde_mm_set_pd(SIMDE_FLOAT64_C( -377.03), SIMDE_FLOAT64_C(  701.23)),
6906       simde_mm_set_pd(SIMDE_FLOAT64_C( -672.47), SIMDE_FLOAT64_C( -328.63)),
6907       simde_mm_set_pd(SIMDE_FLOAT64_C( -328.63), SIMDE_FLOAT64_C(  701.23)) },
6908     { simde_mm_set_pd(SIMDE_FLOAT64_C(  238.70), SIMDE_FLOAT64_C(  837.56)),
6909       simde_mm_set_pd(SIMDE_FLOAT64_C( -429.19), SIMDE_FLOAT64_C(  106.67)),
6910       simde_mm_set_pd(SIMDE_FLOAT64_C(  106.67), SIMDE_FLOAT64_C(  837.56)) },
6911     { simde_mm_set_pd(SIMDE_FLOAT64_C(  571.83), SIMDE_FLOAT64_C( -389.51)),
6912       simde_mm_set_pd(SIMDE_FLOAT64_C(  447.48), SIMDE_FLOAT64_C(   -8.02)),
6913       simde_mm_set_pd(SIMDE_FLOAT64_C(   -8.02), SIMDE_FLOAT64_C( -389.51)) },
6914     { simde_mm_set_pd(SIMDE_FLOAT64_C( -214.27), SIMDE_FLOAT64_C(  549.07)),
6915       simde_mm_set_pd(SIMDE_FLOAT64_C( -967.02), SIMDE_FLOAT64_C( -162.29)),
6916       simde_mm_set_pd(SIMDE_FLOAT64_C( -162.29), SIMDE_FLOAT64_C(  549.07)) },
6917     { simde_mm_set_pd(SIMDE_FLOAT64_C( -528.33), SIMDE_FLOAT64_C(  376.34)),
6918       simde_mm_set_pd(SIMDE_FLOAT64_C( -959.95), SIMDE_FLOAT64_C( -855.93)),
6919       simde_mm_set_pd(SIMDE_FLOAT64_C( -855.93), SIMDE_FLOAT64_C(  376.34)) }
6920   };
6921 
6922   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6923     simde__m128d r = simde_mm_shuffle_pd(test_vec[i].a, test_vec[i].b, 0);
6924     simde_assert_m128d_close(r, test_vec[i].r, 1);
6925   }
6926 
6927   return 0;
6928 }
6929 
6930 static int
6931 test_simde_mm_shufflehi_epi16(SIMDE_MUNIT_TEST_ARGS) {
6932   const struct {
6933     simde__m128i a;
6934     simde__m128i r;
6935   } test_vec[8] = {
6936     { simde_mm_set_epi16(INT16_C(  3588), INT16_C(-23598), INT16_C( -2669), INT16_C( -7880),
6937                          INT16_C( 20391), INT16_C( 13327), INT16_C( 18868), INT16_C( 31239)),
6938       simde_mm_set_epi16(INT16_C(  3588), INT16_C(  3588), INT16_C( -2669), INT16_C( -2669),
6939                          INT16_C( 20391), INT16_C( 13327), INT16_C( 18868), INT16_C( 31239)) },
6940     { simde_mm_set_epi16(INT16_C(  5701), INT16_C( 15357), INT16_C( 27973), INT16_C(-26447),
6941                          INT16_C(-18797), INT16_C(-27249), INT16_C( -9707), INT16_C( -1950)),
6942       simde_mm_set_epi16(INT16_C(  5701), INT16_C(  5701), INT16_C( 27973), INT16_C( 27973),
6943                          INT16_C(-18797), INT16_C(-27249), INT16_C( -9707), INT16_C( -1950)) },
6944     { simde_mm_set_epi16(INT16_C(-14544), INT16_C( 26887), INT16_C( -7591), INT16_C( 22567),
6945                          INT16_C( -8366), INT16_C(-11381), INT16_C(  1736), INT16_C(-23069)),
6946       simde_mm_set_epi16(INT16_C(-14544), INT16_C(-14544), INT16_C( -7591), INT16_C( -7591),
6947                          INT16_C( -8366), INT16_C(-11381), INT16_C(  1736), INT16_C(-23069)) },
6948     { simde_mm_set_epi16(INT16_C( 31637), INT16_C( 12965), INT16_C(-23234), INT16_C(-12784),
6949                          INT16_C(   364), INT16_C(  7338), INT16_C( 16998), INT16_C(-14384)),
6950       simde_mm_set_epi16(INT16_C( 31637), INT16_C( 31637), INT16_C(-23234), INT16_C(-23234),
6951                          INT16_C(   364), INT16_C(  7338), INT16_C( 16998), INT16_C(-14384)) },
6952     { simde_mm_set_epi16(INT16_C( 20104), INT16_C(-31033), INT16_C( 12782), INT16_C( -8281),
6953                          INT16_C( 17249), INT16_C( -1757), INT16_C(-22510), INT16_C(-23902)),
6954       simde_mm_set_epi16(INT16_C( 20104), INT16_C( 20104), INT16_C( 12782), INT16_C( 12782),
6955                          INT16_C( 17249), INT16_C( -1757), INT16_C(-22510), INT16_C(-23902)) },
6956     { simde_mm_set_epi16(INT16_C( 28403), INT16_C(-26721), INT16_C( -6834), INT16_C(-28104),
6957                          INT16_C( -6404), INT16_C( -5723), INT16_C(-30154), INT16_C( -4442)),
6958       simde_mm_set_epi16(INT16_C( 28403), INT16_C( 28403), INT16_C( -6834), INT16_C( -6834),
6959                          INT16_C( -6404), INT16_C( -5723), INT16_C(-30154), INT16_C( -4442)) },
6960     { simde_mm_set_epi16(INT16_C( 18671), INT16_C( -6207), INT16_C( 14078), INT16_C(-30976),
6961                          INT16_C(-25644), INT16_C(-24126), INT16_C( 10939), INT16_C(-13801)),
6962       simde_mm_set_epi16(INT16_C( 18671), INT16_C( 18671), INT16_C( 14078), INT16_C( 14078),
6963                          INT16_C(-25644), INT16_C(-24126), INT16_C( 10939), INT16_C(-13801)) },
6964     { simde_mm_set_epi16(INT16_C(-28546), INT16_C( 12696), INT16_C(-10401), INT16_C( -8517),
6965                          INT16_C( 29702), INT16_C(-10694), INT16_C( 25940), INT16_C( 28112)),
6966       simde_mm_set_epi16(INT16_C(-28546), INT16_C(-28546), INT16_C(-10401), INT16_C(-10401),
6967                          INT16_C( 29702), INT16_C(-10694), INT16_C( 25940), INT16_C( 28112)) }
6968   };
6969 
6970   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
6971     simde__m128i r = simde_mm_shufflehi_epi16(test_vec[i].a, 245);
6972     simde_assert_m128i_i16(r, ==, test_vec[i].r);
6973   }
6974 
6975   return 0;
6976 }
6977 
6978 static int
6979 test_simde_mm_shufflelo_epi16(SIMDE_MUNIT_TEST_ARGS) {
6980   simde__m128i a, r, e;
6981 
6982   a = simde_mm_set_epi16(INT16_C(-24821), INT16_C(-30256), INT16_C(  8570), INT16_C( 11360),
6983                           INT16_C(-20759), INT16_C(-23279), INT16_C(  9158), INT16_C( -6205));
6984   e = simde_mm_set_epi16(INT16_C(-24821), INT16_C(-30256), INT16_C(  8570), INT16_C( 11360),
6985                           INT16_C( -6205), INT16_C(-23279), INT16_C(  9158), INT16_C(-20759));
6986   r = simde_mm_shufflelo_epi16(a,  39);
6987   simde_assert_m128i_i16(r, ==, e);
6988 
6989   a = simde_mm_set_epi16(INT16_C(-26644), INT16_C( -8695), INT16_C( -9741), INT16_C(-14158),
6990                           INT16_C( -3323), INT16_C(  7181), INT16_C( 10186), INT16_C(-16906));
6991   e = simde_mm_set_epi16(INT16_C(-26644), INT16_C( -8695), INT16_C( -9741), INT16_C(-14158),
6992                           INT16_C(  7181), INT16_C(-16906), INT16_C( 10186), INT16_C( -3323));
6993   r = simde_mm_shufflelo_epi16(a, 135);
6994   simde_assert_m128i_i16(r, ==, e);
6995 
6996   a = simde_mm_set_epi16(INT16_C(-20225), INT16_C( 19920), INT16_C( -3607), INT16_C( 11889),
6997                           INT16_C( 12271), INT16_C(-20589), INT16_C( 17338), INT16_C( -7507));
6998   e = simde_mm_set_epi16(INT16_C(-20225), INT16_C( 19920), INT16_C( -3607), INT16_C( 11889),
6999                           INT16_C( 12271), INT16_C( 17338), INT16_C( 12271), INT16_C( -7507));
7000   r = simde_mm_shufflelo_epi16(a, 220);
7001   simde_assert_m128i_i16(r, ==, e);
7002 
7003   a = simde_mm_set_epi16(INT16_C( -8042), INT16_C(-18261), INT16_C( 20990), INT16_C(-18752),
7004                           INT16_C( 26566), INT16_C(-27202), INT16_C( -3939), INT16_C( -1274));
7005   e = simde_mm_set_epi16(INT16_C( -8042), INT16_C(-18261), INT16_C( 20990), INT16_C(-18752),
7006                           INT16_C( -3939), INT16_C( 26566), INT16_C( -3939), INT16_C( -1274));
7007   r = simde_mm_shufflelo_epi16(a, 116);
7008   simde_assert_m128i_i16(r, ==, e);
7009 
7010   a = simde_mm_set_epi16(INT16_C(  5383), INT16_C(-27918), INT16_C( 16559), INT16_C(-31608),
7011                           INT16_C(  6504), INT16_C(-11225), INT16_C(-13396), INT16_C( 20261));
7012   e = simde_mm_set_epi16(INT16_C(  5383), INT16_C(-27918), INT16_C( 16559), INT16_C(-31608),
7013                           INT16_C(-13396), INT16_C(-13396), INT16_C(-13396), INT16_C(-13396));
7014   r = simde_mm_shufflelo_epi16(a,  85);
7015   simde_assert_m128i_i16(r, ==, e);
7016 
7017   a = simde_mm_set_epi16(INT16_C( -8905), INT16_C( 30480), INT16_C( 20250), INT16_C(    30),
7018                           INT16_C( 24188), INT16_C( 21861), INT16_C( -9955), INT16_C(  6282));
7019   e = simde_mm_set_epi16(INT16_C( -8905), INT16_C( 30480), INT16_C( 20250), INT16_C(    30),
7020                           INT16_C(  6282), INT16_C( 24188), INT16_C( 21861), INT16_C( 21861));
7021   r = simde_mm_shufflelo_epi16(a,  58);
7022   simde_assert_m128i_i16(r, ==, e);
7023 
7024   a = simde_mm_set_epi16(INT16_C(  7654), INT16_C(  4685), INT16_C( 25749), INT16_C(-30088),
7025                           INT16_C( -7783), INT16_C( 10182), INT16_C( 23640), INT16_C(  4937));
7026   e = simde_mm_set_epi16(INT16_C(  7654), INT16_C(  4685), INT16_C( 25749), INT16_C(-30088),
7027                           INT16_C( -7783), INT16_C(  4937), INT16_C( 23640), INT16_C(  4937));
7028   r = simde_mm_shufflelo_epi16(a, 196);
7029   simde_assert_m128i_i16(r, ==, e);
7030 
7031   a = simde_mm_set_epi16(INT16_C(-26752), INT16_C(  9125), INT16_C(-14825), INT16_C( 13732),
7032                           INT16_C( 15859), INT16_C(-32053), INT16_C(-12419), INT16_C( 17722));
7033   e = simde_mm_set_epi16(INT16_C(-26752), INT16_C(  9125), INT16_C(-14825), INT16_C( 13732),
7034                           INT16_C( 15859), INT16_C( 17722), INT16_C(-12419), INT16_C(-32053));
7035   r = simde_mm_shufflelo_epi16(a, 198);
7036   simde_assert_m128i_i16(r, ==, e);
7037 
7038   return 0;
7039 }
7040 
7041 static int
7042 test_simde_mm_sra_epi16(SIMDE_MUNIT_TEST_ARGS) {
7043   const struct {
7044     simde__m128i a;
7045     simde__m128i count;
7046     simde__m128i r;
7047   } test_vec[8] = {
7048     { simde_mm_set_epi16(INT16_C( 28258), INT16_C(  1159), INT16_C( 20634), INT16_C(-30158),
7049                          INT16_C( 10049), INT16_C(-31721), INT16_C(-26691), INT16_C(-28181)),
7050       simde_mm_set_epi16(INT16_C(    11), INT16_C(     6), INT16_C(    10), INT16_C(     8),
7051                          INT16_C(    15), INT16_C(     3), INT16_C(     8), INT16_C(     1)),
7052       simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(    -1),
7053                          INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(    -1)) },
7054     { simde_mm_set_epi16(INT16_C(-25682), INT16_C(  7964), INT16_C(  1259), INT16_C( 18017),
7055                          INT16_C( 10765), INT16_C(-10649), INT16_C( -9400), INT16_C( 12110)),
7056       simde_mm_set_epi16(INT16_C(     3), INT16_C(     8), INT16_C(     3), INT16_C(     2),
7057                          INT16_C(    10), INT16_C(     5), INT16_C(     2), INT16_C(     2)),
7058       simde_mm_set_epi16(INT16_C(    -1), INT16_C(     0), INT16_C(     0), INT16_C(     0),
7059                          INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(     0)) },
7060     { simde_mm_set_epi16(INT16_C(-24685), INT16_C( 14370), INT16_C( 13079), INT16_C( -6409),
7061                          INT16_C(-18776), INT16_C( 20941), INT16_C( 22692), INT16_C(   312)),
7062       simde_mm_set_epi16(INT16_C(    13), INT16_C(    13), INT16_C(     0), INT16_C(     7),
7063                          INT16_C(     4), INT16_C(    10), INT16_C(    15), INT16_C(    11)),
7064       simde_mm_set_epi16(INT16_C(    -1), INT16_C(     0), INT16_C(     0), INT16_C(    -1),
7065                          INT16_C(    -1), INT16_C(     0), INT16_C(     0), INT16_C(     0)) },
7066     { simde_mm_set_epi16(INT16_C( 13442), INT16_C(-32489), INT16_C(-21378), INT16_C( 10156),
7067                          INT16_C( 15393), INT16_C( 20131), INT16_C( 15138), INT16_C(-12589)),
7068       simde_mm_set_epi16(INT16_C(     1), INT16_C(     3), INT16_C(     1), INT16_C(     8),
7069                          INT16_C(    12), INT16_C(     3), INT16_C(     4), INT16_C(     2)),
7070       simde_mm_set_epi16(INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(     0),
7071                          INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(    -1)) },
7072     { simde_mm_set_epi16(INT16_C( -9561), INT16_C( 25554), INT16_C( -5305), INT16_C( -7173),
7073                          INT16_C(-10064), INT16_C( 31075), INT16_C( 30218), INT16_C(-18929)),
7074       simde_mm_set_epi16(INT16_C(     5), INT16_C(    10), INT16_C(     8), INT16_C(     1),
7075                          INT16_C(    12), INT16_C(     3), INT16_C(    10), INT16_C(    10)),
7076       simde_mm_set_epi16(INT16_C(    -1), INT16_C(     0), INT16_C(    -1), INT16_C(    -1),
7077                          INT16_C(    -1), INT16_C(     0), INT16_C(     0), INT16_C(    -1)) },
7078     { simde_mm_set_epi16(INT16_C( 14091), INT16_C( 24202), INT16_C( -8543), INT16_C( -7482),
7079                          INT16_C(-26143), INT16_C( 20277), INT16_C(-27984), INT16_C(-32658)),
7080       simde_mm_set_epi16(INT16_C(     5), INT16_C(     7), INT16_C(     6), INT16_C(     6),
7081                          INT16_C(    10), INT16_C(     3), INT16_C(     7), INT16_C(    11)),
7082       simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(    -1),
7083                          INT16_C(    -1), INT16_C(     0), INT16_C(    -1), INT16_C(    -1)) },
7084     { simde_mm_set_epi16(INT16_C(-19726), INT16_C( 12311), INT16_C( 16279), INT16_C( -6277),
7085                          INT16_C( 19874), INT16_C(-27089), INT16_C( 14524), INT16_C(-14305)),
7086       simde_mm_set_epi16(INT16_C(     7), INT16_C(     7), INT16_C(     3), INT16_C(     1),
7087                          INT16_C(    12), INT16_C(     1), INT16_C(    10), INT16_C(     9)),
7088       simde_mm_set_epi16(INT16_C(    -1), INT16_C(     0), INT16_C(     0), INT16_C(    -1),
7089                          INT16_C(     0), INT16_C(    -1), INT16_C(     0), INT16_C(    -1)) },
7090     { simde_mm_set_epi16(INT16_C( 19801), INT16_C(-12786), INT16_C( 31632), INT16_C( 19030),
7091                          INT16_C(-19420), INT16_C(-12406), INT16_C( 12426), INT16_C( 27612)),
7092       simde_mm_set_epi16(INT16_C(     2), INT16_C(     0), INT16_C(    10), INT16_C(     3),
7093                          INT16_C(     9), INT16_C(     0), INT16_C(     1), INT16_C(     8)),
7094       simde_mm_set_epi16(INT16_C(     0), INT16_C(    -1), INT16_C(     0), INT16_C(     0),
7095                          INT16_C(    -1), INT16_C(    -1), INT16_C(     0), INT16_C(     0)) }
7096   };
7097 
7098   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7099     simde__m128i r = simde_mm_sra_epi16(test_vec[i].a, test_vec[i].count);
7100     simde_assert_m128i_i16(r, ==, test_vec[i].r);
7101   }
7102 
7103   return 0;
7104 }
7105 
7106 static int
7107 test_simde_mm_sll_epi16(SIMDE_MUNIT_TEST_ARGS) {
7108   const struct {
7109     simde__m128i a;
7110     simde__m128i count;
7111     simde__m128i r;
7112   } test_vec[] = {
7113     { simde_mm_set_epi16(INT16_C(-11777), INT16_C( 26803), INT16_C(-29366), INT16_C(-28135),
7114                          INT16_C( 26578), INT16_C(-22566), INT16_C(-18521), INT16_C( -1087)),
7115       simde_mm_set_epi64x(INT64_C(-1766274549416496901), ~INT64_C(0)),
7116       simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0),
7117                          INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0)) },
7118     { simde_mm_set_epi16(INT16_C( 20694), INT16_C(-29451), INT16_C(-14684), INT16_C( 26977),
7119                          INT16_C(  9617), INT16_C(  4798), INT16_C(  6081), INT16_C( 26243)),
7120       simde_mm_set_epi64x(INT64_C(4317315664183993059), INT64_C(                  0)),
7121       simde_mm_set_epi16(INT16_C( 20694), INT16_C(-29451), INT16_C(-14684), INT16_C( 26977),
7122                          INT16_C(  9617), INT16_C(  4798), INT16_C(  6081), INT16_C( 26243)) },
7123     { simde_mm_set_epi16(INT16_C(-19602), INT16_C(-30869), INT16_C( -4506), INT16_C(  7721),
7124                          INT16_C( 10990), INT16_C(-12116), INT16_C( 29998), INT16_C(  -194)),
7125       simde_mm_set_epi64x(INT64_C(5323917981768999693), INT64_C(                  1)),
7126       simde_mm_set_epi16(INT16_C( 26332), INT16_C(  3798), INT16_C( -9012), INT16_C( 15442),
7127                          INT16_C( 21980), INT16_C(-24232), INT16_C( -5540), INT16_C(  -388)) },
7128     { simde_mm_set_epi16(INT16_C( -7669), INT16_C(-27334), INT16_C( 24496), INT16_C( 27065),
7129                          INT16_C( 13859), INT16_C(  2295), INT16_C( 31737), INT16_C( -2884)),
7130       simde_mm_set_epi64x(INT64_C(4743197663988711830), INT64_C(                  2)),
7131       simde_mm_set_epi16(INT16_C(-30676), INT16_C( 21736), INT16_C( 32448), INT16_C(-22812),
7132                          INT16_C(-10100), INT16_C(  9180), INT16_C( -4124), INT16_C(-11536)) },
7133     { simde_mm_set_epi16(INT16_C( -8360), INT16_C( 29662), INT16_C(  6226), INT16_C( 10396),
7134                          INT16_C(-32749), INT16_C( 20802), INT16_C( 12391), INT16_C(  4472)),
7135       simde_mm_set_epi64x(INT64_C(-4440768506472940517), INT64_C(                  3)),
7136       simde_mm_set_epi16(INT16_C( -1344), INT16_C(-24848), INT16_C(-15728), INT16_C( 17632),
7137                          INT16_C(   152), INT16_C(-30192), INT16_C(-31944), INT16_C(-29760)) },
7138     { simde_mm_set_epi16(INT16_C( 26979), INT16_C(  -773), INT16_C( 29656), INT16_C( 12973),
7139                          INT16_C(-28581), INT16_C( -1290), INT16_C( 25294), INT16_C(  -882)),
7140       simde_mm_set_epi64x(INT64_C(-8434753600973098893), INT64_C(                  4)),
7141       simde_mm_set_epi16(INT16_C(-27088), INT16_C(-12368), INT16_C( 15744), INT16_C( 10960),
7142                          INT16_C(  1456), INT16_C(-20640), INT16_C( 11488), INT16_C(-14112)) },
7143     { simde_mm_set_epi16(INT16_C(-20013), INT16_C( 14301), INT16_C(-17775), INT16_C(-12493),
7144                          INT16_C(-22187), INT16_C( -2203), INT16_C( 22935), INT16_C( -5230)),
7145       simde_mm_set_epi64x(INT64_C(-718166367052449426), INT64_C(                 13)),
7146       simde_mm_set_epi16(INT16_C( 24576), INT16_C(-24576), INT16_C(  8192), INT16_C( 24576),
7147                          INT16_C(-24576), INT16_C(-24576), INT16_C( -8192), INT16_C( 16384)) },
7148     { simde_mm_set_epi16(INT16_C( -9377), INT16_C(-13109), INT16_C(  2614), INT16_C(-17099),
7149                          INT16_C(-13260), INT16_C( 21790), INT16_C(  8183), INT16_C( 12820)),
7150       simde_mm_set_epi64x(INT64_C(-3082182550035776352), INT64_C(                 14)),
7151       simde_mm_set_epi16(INT16_C(-16384), INT16_C(-16384), INT16_C(-32768), INT16_C( 16384),
7152                          INT16_C(     0), INT16_C(-32768), INT16_C(-16384), INT16_C(     0)) },
7153     { simde_mm_set_epi16(INT16_C( 21339), INT16_C(-22944), INT16_C( 30792), INT16_C(-23288),
7154                          INT16_C(-13340), INT16_C(  7657), INT16_C(  8339), INT16_C( 10093)),
7155       simde_mm_set_epi64x(INT64_C(-8360903661682410487), INT64_C(                 15)),
7156       simde_mm_set_epi16(INT16_C(-32768), INT16_C(     0), INT16_C(     0), INT16_C(     0),
7157                          INT16_C(     0), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) },
7158     { simde_mm_set_epi16(INT16_C(-12198), INT16_C(  1510), INT16_C( -3241), INT16_C(-10552),
7159                          INT16_C(-10041), INT16_C( 23083), INT16_C( 11931), INT16_C( 10037)),
7160       simde_mm_set_epi64x(INT64_C(7382630779200792207), INT64_C(                 16)),
7161       simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0),
7162                          INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0)) },
7163     { simde_mm_set_epi16(INT16_C( -4565), INT16_C(-19321), INT16_C( 29437), INT16_C( -8916),
7164                          INT16_C( 18870), INT16_C(-29403), INT16_C(   667), INT16_C(-22848)),
7165       simde_mm_set_epi64x(INT64_C(   7403670930710815), INT64_C(                 17)),
7166       simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0),
7167                          INT16_C(     0), INT16_C(     0), INT16_C(     0), INT16_C(     0)) }
7168   };
7169 
7170   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7171     simde__m128i r = simde_mm_sll_epi16(test_vec[i].a, test_vec[i].count);
7172     simde_assert_m128i_i16(r, ==, test_vec[i].r);
7173   }
7174 
7175   return 0;
7176 }
7177 
7178 static int
7179 test_simde_mm_sll_epi32(SIMDE_MUNIT_TEST_ARGS) {
7180   const struct {
7181     simde__m128i a;
7182     simde__m128i count;
7183     simde__m128i r;
7184   } test_vec[50] = {
7185     { simde_mm_set_epi32(INT32_C( 1847585989), INT32_C( -535718080), INT32_C(-1279093253), INT32_C(  656800013)),
7186       simde_x_mm_set_epu64x(UINT64_C( 2450913859380011969), UINT64_C(18446744073709551615)),
7187       simde_mm_set_epi32(INT32_C(          0), INT32_C(          0), INT32_C(          0), INT32_C(          0)) },
7188     { simde_mm_set_epi32(INT32_C(  894927109), INT32_C(  930856884), INT32_C(  350764320), INT32_C(  435252602)),
7189       simde_x_mm_set_epu64x(UINT64_C( 4964670149549210828), UINT64_C(                   0)),
7190       simde_mm_set_epi32(INT32_C(  894927109), INT32_C(  930856884), INT32_C(  350764320), INT32_C(  435252602)) },
7191     { simde_mm_set_epi32(INT32_C( -264520264), INT32_C( 2022498436), INT32_C(-1437067245), INT32_C(  482847980)),
7192       simde_x_mm_set_epu64x(UINT64_C( 7326459959939805716), UINT64_C(                   1)),
7193       simde_mm_set_epi32(INT32_C( -529040528), INT32_C( -249970424), INT32_C( 1420832806), INT32_C(  965695960)) },
7194     { simde_mm_set_epi32(INT32_C(  -73269821), INT32_C(-1137239147), INT32_C(  168132057), INT32_C( -131743227)),
7195       simde_x_mm_set_epu64x(UINT64_C( 1477135654656320870), UINT64_C(                   2)),
7196       simde_mm_set_epi32(INT32_C( -293079284), INT32_C( -253989292), INT32_C(  672528228), INT32_C( -526972908)) },
7197     { simde_mm_set_epi32(INT32_C(  676475770), INT32_C(  743649739), INT32_C( 1613393787), INT32_C(  257685631)),
7198       simde_x_mm_set_epu64x(UINT64_C(14989079754060836033), UINT64_C(                   3)),
7199       simde_mm_set_epi32(INT32_C( 1116838864), INT32_C( 1654230616), INT32_C(   22248408), INT32_C( 2061485048)) },
7200     { simde_mm_set_epi32(INT32_C( 1293905571), INT32_C(-1134008712), INT32_C(-1835354706), INT32_C( -173430307)),
7201       simde_x_mm_set_epu64x(UINT64_C(15716033284919086785), UINT64_C(                  29)),
7202       simde_mm_set_epi32(INT32_C( 1610612736), INT32_C(          0), INT32_C(-1073741824), INT32_C(-1610612736)) },
7203     { simde_mm_set_epi32(INT32_C(-1608827194), INT32_C( -758406839), INT32_C(-1895836042), INT32_C(-1122971027)),
7204       simde_x_mm_set_epu64x(UINT64_C(  240001894519477005), UINT64_C(                  30)),
7205       simde_mm_set_epi32(          INT32_MIN , INT32_C( 1073741824),           INT32_MIN , INT32_C( 1073741824)) },
7206     { simde_mm_set_epi32(INT32_C( 1629035853), INT32_C(  172553194), INT32_C(  533866060), INT32_C(  504662481)),
7207       simde_x_mm_set_epu64x(UINT64_C(16117634661514065169), UINT64_C(                  31)),
7208       simde_mm_set_epi32(          INT32_MIN , INT32_C(          0), INT32_C(          0),           INT32_MIN ) },
7209     { simde_mm_set_epi32(INT32_C(-1841013582), INT32_C(-1759681954), INT32_C(-1933278842), INT32_C( 1138123852)),
7210       simde_x_mm_set_epu64x(UINT64_C(16122278597987411920), UINT64_C(                  32)),
7211       simde_mm_set_epi32(INT32_C(          0), INT32_C(          0), INT32_C(          0), INT32_C(          0)) },
7212     { simde_mm_set_epi32(INT32_C( 1016164793), INT32_C(  934378122), INT32_C( 1851284098), INT32_C(  118468072)),
7213       simde_x_mm_set_epu64x(UINT64_C( 9847102169886565139), UINT64_C(                  33)),
7214       simde_mm_set_epi32(INT32_C(          0), INT32_C(          0), INT32_C(          0), INT32_C(          0)) }
7215   };
7216 
7217   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7218     simde__m128i r = simde_mm_sll_epi32(test_vec[i].a, test_vec[i].count);
7219     simde_assert_m128i_i32(r, ==, test_vec[i].r);
7220   }
7221 
7222   return 0;
7223 }
7224 
7225 static int
7226 test_simde_mm_sll_epi64(SIMDE_MUNIT_TEST_ARGS) {
7227   const struct {
7228     simde__m128i a;
7229     simde__m128i count;
7230     simde__m128i r;
7231   } test_vec[8] = {
7232     { simde_mm_set_epi64x(INT64_C( 8055788268748421105), INT64_C(-9066834056558614160)),
7233       simde_mm_set_epi64x(INT64_C(-5262793264663215472), INT64_C(16)),
7234       simde_mm_set_epi64x(INT64_C(-1675408870841712640), INT64_C( 2483371706739064832)) },
7235     { simde_mm_set_epi64x(INT64_C( 2441732847819780871), INT64_C( -124127278813603777)),
7236       simde_mm_set_epi64x(INT64_C(-8018169735231443299), INT64_C( 1)),
7237       simde_mm_set_epi64x(INT64_C( 4883465695639561742), INT64_C( -248254557627207554)) },
7238     { simde_mm_set_epi64x(INT64_C(-2211386688605493428), INT64_C( -350563182553241755)),
7239       simde_mm_set_epi64x(INT64_C( 1150552132815785095), INT64_C(12)),
7240       simde_mm_set_epi64x(INT64_C( -488536336711237632), INT64_C( 2939242011266797568)) },
7241     { simde_mm_set_epi64x(INT64_C( 2987527187015640759), INT64_C(  638426944527652749)),
7242       simde_mm_set_epi64x(INT64_C(-1714103729784977145), INT64_C( 5)),
7243       simde_mm_set_epi64x(INT64_C( 3367149615952746208), INT64_C( 1982918151175336352)) },
7244     { simde_mm_set_epi64x(INT64_C( 4972525455608644218), INT64_C( 6137457836149854777)),
7245       simde_mm_set_epi64x(INT64_C(-8922909725876665702), INT64_C( 2)),
7246       simde_mm_set_epi64x(INT64_C( 1443357748725025256), INT64_C( 6103087270889867492)) },
7247     { simde_mm_set_epi64x(INT64_C(-6484089245702098359), INT64_C(  413459708861121590)),
7248       simde_mm_set_epi64x(INT64_C( 7011241116916112587), INT64_C(15)),
7249       simde_mm_set_epi64x(INT64_C(-1038162179743514624), INT64_C( 8337589858421374976)) },
7250     { simde_mm_set_epi64x(INT64_C(-1797418312522800237), INT64_C( 3481510514608785630)),
7251       simde_mm_set_epi64x(INT64_C( 4951339001913100627), INT64_C(13)),
7252       simde_mm_set_epi64x(INT64_C(-3949045366557351936), INT64_C( 1867797720205082624)) },
7253     { simde_mm_set_epi64x(INT64_C( 7626804351806608498), INT64_C(-4244380112569402483)),
7254       simde_mm_set_epi64x(INT64_C( 1577848631857250403), INT64_C( 4)),
7255       simde_mm_set_epi64x(INT64_C(-7098338887061125344), INT64_C( 5876894493727766736)) }
7256   };
7257 
7258   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7259     simde__m128i r = simde_mm_sll_epi64(test_vec[i].a, test_vec[i].count);
7260     simde_assert_m128i_i64(r, ==, test_vec[i].r);
7261   }
7262 
7263   return 0;
7264 }
7265 
7266 static int
7267 test_simde_mm_sqrt_pd (SIMDE_MUNIT_TEST_ARGS) {
7268   static const struct {
7269     const simde_float64 a[2];
7270     const simde_float64 r[2];
7271   } test_vec[] = {
7272     { { SIMDE_FLOAT64_C(   481.04), SIMDE_FLOAT64_C(   845.64) },
7273       { SIMDE_FLOAT64_C(    21.93), SIMDE_FLOAT64_C(    29.08) } },
7274     { { SIMDE_FLOAT64_C(   520.60), SIMDE_FLOAT64_C(   759.12) },
7275       { SIMDE_FLOAT64_C(    22.82), SIMDE_FLOAT64_C(    27.55) } },
7276     { { SIMDE_FLOAT64_C(    35.64), SIMDE_FLOAT64_C(   486.89) },
7277       { SIMDE_FLOAT64_C(     5.97), SIMDE_FLOAT64_C(    22.07) } },
7278     { { SIMDE_FLOAT64_C(   -79.78), SIMDE_FLOAT64_C(   723.70) },
7279       {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(    26.90) } },
7280     { { SIMDE_FLOAT64_C(   719.24), SIMDE_FLOAT64_C(   373.08) },
7281       { SIMDE_FLOAT64_C(    26.82), SIMDE_FLOAT64_C(    19.32) } },
7282     { { SIMDE_FLOAT64_C(   497.67), SIMDE_FLOAT64_C(   489.69) },
7283       { SIMDE_FLOAT64_C(    22.31), SIMDE_FLOAT64_C(    22.13) } },
7284     { { SIMDE_FLOAT64_C(   925.51), SIMDE_FLOAT64_C(   932.27) },
7285       { SIMDE_FLOAT64_C(    30.42), SIMDE_FLOAT64_C(    30.53) } },
7286     { { SIMDE_FLOAT64_C(   -49.82), SIMDE_FLOAT64_C(   705.12) },
7287       {             SIMDE_MATH_NAN, SIMDE_FLOAT64_C(    26.55) } }
7288   };
7289 
7290   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
7291     simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
7292     simde__m128d r = simde_mm_sqrt_pd(a);
7293     simde_test_x86_assert_equal_f64x2(r, simde_mm_loadu_pd(test_vec[i].r), 1);
7294   }
7295 
7296   return 0;
7297 }
7298 
7299 static int
7300 test_simde_mm_sqrt_sd(SIMDE_MUNIT_TEST_ARGS) {
7301   const struct {
7302     simde__m128d a;
7303     simde__m128d b;
7304     simde__m128d r;
7305   } test_vec[8] = {
7306     { simde_mm_set_pd(SIMDE_FLOAT64_C(    1.82), SIMDE_FLOAT64_C(  868.47)),
7307       simde_mm_set_pd(SIMDE_FLOAT64_C(  180.11), SIMDE_FLOAT64_C(  621.52)),
7308       simde_mm_set_pd(SIMDE_FLOAT64_C(    1.82), SIMDE_FLOAT64_C(   24.93)) },
7309     { simde_mm_set_pd(SIMDE_FLOAT64_C(  458.20), SIMDE_FLOAT64_C(  211.55)),
7310       simde_mm_set_pd(SIMDE_FLOAT64_C(  430.02), SIMDE_FLOAT64_C(  152.28)),
7311       simde_mm_set_pd(SIMDE_FLOAT64_C(  458.20), SIMDE_FLOAT64_C(   12.34)) },
7312     { simde_mm_set_pd(SIMDE_FLOAT64_C(  790.70), SIMDE_FLOAT64_C(  272.49)),
7313       simde_mm_set_pd(SIMDE_FLOAT64_C(  882.78), SIMDE_FLOAT64_C(  929.30)),
7314       simde_mm_set_pd(SIMDE_FLOAT64_C(  790.70), SIMDE_FLOAT64_C(   30.48)) },
7315     { simde_mm_set_pd(SIMDE_FLOAT64_C(  103.00), SIMDE_FLOAT64_C(   65.43)),
7316       simde_mm_set_pd(SIMDE_FLOAT64_C(  542.46), SIMDE_FLOAT64_C(  784.04)),
7317       simde_mm_set_pd(SIMDE_FLOAT64_C(  103.00), SIMDE_FLOAT64_C(   28.00)) },
7318     { simde_mm_set_pd(SIMDE_FLOAT64_C(  373.53), SIMDE_FLOAT64_C(  698.61)),
7319       simde_mm_set_pd(SIMDE_FLOAT64_C(  142.54), SIMDE_FLOAT64_C(  348.23)),
7320       simde_mm_set_pd(SIMDE_FLOAT64_C(  373.53), SIMDE_FLOAT64_C(   18.66)) },
7321     { simde_mm_set_pd(SIMDE_FLOAT64_C(  528.07), SIMDE_FLOAT64_C(  477.87)),
7322       simde_mm_set_pd(SIMDE_FLOAT64_C(  384.87), SIMDE_FLOAT64_C(  433.33)),
7323       simde_mm_set_pd(SIMDE_FLOAT64_C(  528.07), SIMDE_FLOAT64_C(   20.82)) },
7324     { simde_mm_set_pd(SIMDE_FLOAT64_C(  241.09), SIMDE_FLOAT64_C(  679.09)),
7325       simde_mm_set_pd(SIMDE_FLOAT64_C(  322.35), SIMDE_FLOAT64_C(  620.04)),
7326       simde_mm_set_pd(SIMDE_FLOAT64_C(  241.09), SIMDE_FLOAT64_C(   24.90)) },
7327     { simde_mm_set_pd(SIMDE_FLOAT64_C(  651.18), SIMDE_FLOAT64_C(  446.59)),
7328       simde_mm_set_pd(SIMDE_FLOAT64_C(  886.36), SIMDE_FLOAT64_C(  269.28)),
7329       simde_mm_set_pd(SIMDE_FLOAT64_C(  651.18), SIMDE_FLOAT64_C(   16.41)) }
7330   };
7331 
7332   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7333     simde__m128d r = simde_mm_sqrt_sd(test_vec[i].a, test_vec[i].b);
7334     simde_assert_m128d_close(r, test_vec[i].r, 1);
7335   }
7336 
7337   return 0;
7338 }
7339 
7340 static int
7341 test_simde_mm_srl_epi16(SIMDE_MUNIT_TEST_ARGS) {
7342   const struct {
7343     simde__m128i a;
7344     simde__m128i count;
7345     simde__m128i r;
7346   } test_vec[8] = {
7347     { simde_mm_set_epi16(INT16_C(  1445), INT16_C( 14472), INT16_C(-18508), INT16_C( -4645),
7348                          INT16_C(-24581), INT16_C(-12656), INT16_C(  1275), INT16_C(-25245)),
7349       simde_mm_set_epi64x(INT64_C( 4678230141678036905), INT64_C( 4)),
7350       simde_mm_set_epi16(INT16_C(    90), INT16_C(   904), INT16_C(  2939), INT16_C(  3805),
7351                          INT16_C(  2559), INT16_C(  3305), INT16_C(    79), INT16_C(  2518)) },
7352     { simde_mm_set_epi16(INT16_C(   986), INT16_C( 31796), INT16_C(-12770), INT16_C(-28401),
7353                          INT16_C( 15186), INT16_C(-17595), INT16_C( 31992), INT16_C( 19329)),
7354       simde_mm_set_epi64x(INT64_C(  234386534661459961), INT64_C( 2)),
7355       simde_mm_set_epi16(INT16_C(   246), INT16_C(  7949), INT16_C( 13191), INT16_C(  9283),
7356                          INT16_C(  3796), INT16_C( 11985), INT16_C(  7998), INT16_C(  4832)) },
7357     { simde_mm_set_epi16(INT16_C(-23898), INT16_C(  7158), INT16_C( 21829), INT16_C(-16536),
7358                          INT16_C(  2052), INT16_C( -6635), INT16_C( 18408), INT16_C( -3755)),
7359       simde_mm_set_epi64x(INT64_C( 8276161762185938564), INT64_C( 7)),
7360       simde_mm_set_epi16(INT16_C(   325), INT16_C(    55), INT16_C(   170), INT16_C(   382),
7361                          INT16_C(    16), INT16_C(   460), INT16_C(   143), INT16_C(   482)) },
7362     { simde_mm_set_epi16(INT16_C(-19513), INT16_C(-10508), INT16_C(-12500), INT16_C( 22379),
7363                          INT16_C(  4775), INT16_C(  8063), INT16_C(  8132), INT16_C(  7840)),
7364       simde_mm_set_epi64x(INT64_C( 1101003055866698034), INT64_C( 6)),
7365       simde_mm_set_epi16(INT16_C(   719), INT16_C(   859), INT16_C(   828), INT16_C(   349),
7366                          INT16_C(    74), INT16_C(   125), INT16_C(   127), INT16_C(   122)) },
7367     { simde_mm_set_epi16(INT16_C(  9942), INT16_C( 29561), INT16_C( -4121), INT16_C(-26882),
7368                          INT16_C(-17939), INT16_C( 13186), INT16_C(  6796), INT16_C( 14206)),
7369       simde_mm_set_epi64x(INT64_C(  735258903315099979), INT64_C( 1)),
7370       simde_mm_set_epi16(INT16_C(  4971), INT16_C( 14780), INT16_C( 30707), INT16_C( 19327),
7371                          INT16_C( 23798), INT16_C(  6593), INT16_C(  3398), INT16_C(  7103)) },
7372     { simde_mm_set_epi16(INT16_C(  5648), INT16_C(-13469), INT16_C(-23201), INT16_C(  7029),
7373                          INT16_C(-28211), INT16_C(-14496), INT16_C( 31202), INT16_C(-32095)),
7374       simde_mm_set_epi64x(INT64_C( 4870695400140482879), INT64_C(13)),
7375       simde_mm_set_epi16(INT16_C(     0), INT16_C(     6), INT16_C(     5), INT16_C(     0),
7376                          INT16_C(     4), INT16_C(     6), INT16_C(     3), INT16_C(     4)) },
7377     { simde_mm_set_epi16(INT16_C( 11526), INT16_C( 20336), INT16_C( 18003), INT16_C( 21727),
7378                          INT16_C(-28471), INT16_C(-32732), INT16_C(-25472), INT16_C( 12636)),
7379       simde_mm_set_epi64x(INT64_C(-6737308052137237000), INT64_C( 3)),
7380       simde_mm_set_epi16(INT16_C(  1440), INT16_C(  2542), INT16_C(  2250), INT16_C(  2715),
7381                          INT16_C(  4633), INT16_C(  4100), INT16_C(  5008), INT16_C(  1579)) },
7382     { simde_mm_set_epi16(INT16_C(-30386), INT16_C( -2761), INT16_C( 11467), INT16_C(  9929),
7383                          INT16_C(-19380), INT16_C(-12818), INT16_C( -4584), INT16_C( -6145)),
7384       simde_mm_set_epi64x(INT64_C(-2450775638354168945), INT64_C( 3)),
7385       simde_mm_set_epi16(INT16_C(  4393), INT16_C(  7846), INT16_C(  1433), INT16_C(  1241),
7386                          INT16_C(  5769), INT16_C(  6589), INT16_C(  7619), INT16_C(  7423)) }
7387   };
7388 
7389   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7390     simde__m128i r = simde_mm_srl_epi16(test_vec[i].a, test_vec[i].count);
7391     simde_assert_m128i_i16(r, ==, test_vec[i].r);
7392   }
7393 
7394   return 0;
7395 }
7396 
7397 static int
7398 test_simde_mm_srl_epi32(SIMDE_MUNIT_TEST_ARGS) {
7399   const struct {
7400     simde__m128i a;
7401     simde__m128i count;
7402     simde__m128i r;
7403   } test_vec[8] = {
7404     { simde_mm_set_epi32(INT32_C(   94713992), INT32_C(-1212879397), INT32_C(-1610887536), INT32_C(   83598691)),
7405       simde_mm_set_epi64x(INT64_C( 4678230141678036905), INT64_C( 4)),
7406       simde_mm_set_epi32(INT32_C(    5919624), INT32_C(  192630493), INT32_C(  167754985), INT32_C(    5224918)) },
7407     { simde_mm_set_epi32(INT32_C(   64650292), INT32_C( -836857585), INT32_C(  995277637), INT32_C( 2096647041)),
7408       simde_mm_set_epi64x(INT64_C(  234386534661459961), INT64_C( 2)),
7409       simde_mm_set_epi32(INT32_C(   16162573), INT32_C(  864527427), INT32_C(  248819409), INT32_C(  524161760)) },
7410     { simde_mm_set_epi32(INT32_C(-1566172170), INT32_C( 1430634344), INT32_C(  134538773), INT32_C( 1206448469)),
7411       simde_mm_set_epi64x(INT64_C( 8276161762185938564), INT64_C( 7)),
7412       simde_mm_set_epi32(INT32_C(   21318711), INT32_C(   11176830), INT32_C(    1051084), INT32_C(    9425378)) },
7413     { simde_mm_set_epi32(INT32_C(-1278748940), INT32_C( -819177621), INT32_C(  312942463), INT32_C(  532946592)),
7414       simde_mm_set_epi64x(INT64_C( 1101003055866698034), INT64_C( 6)),
7415       simde_mm_set_epi32(INT32_C(   47128411), INT32_C(   54309213), INT32_C(    4889725), INT32_C(    8327290)) },
7416     { simde_mm_set_epi32(INT32_C(  651588473), INT32_C( -270035202), INT32_C(-1175637118), INT32_C(  445396862)),
7417       simde_mm_set_epi64x(INT64_C(  735258903315099979), INT64_C( 1)),
7418       simde_mm_set_epi32(INT32_C(  325794236), INT32_C( 2012466047), INT32_C( 1559665089), INT32_C(  222698431)) },
7419     { simde_mm_set_epi32(INT32_C(  370199395), INT32_C(-1520493707), INT32_C(-1848785056), INT32_C( 2044887713)),
7420       simde_mm_set_epi64x(INT64_C( 4870695400140482879), INT64_C(13)),
7421       simde_mm_set_epi32(INT32_C(      45190), INT32_C(     338680), INT32_C(     298606), INT32_C(     249620)) },
7422     { simde_mm_set_epi32(INT32_C(  755388272), INT32_C( 1179866335), INT32_C(-1865842652), INT32_C(-1669320356)),
7423       simde_mm_set_epi64x(INT64_C(-6737308052137237000), INT64_C( 3)),
7424       simde_mm_set_epi32(INT32_C(   94423534), INT32_C(  147483291), INT32_C(  303640580), INT32_C(  328205867)) },
7425     { simde_mm_set_epi32(INT32_C(-1991314121), INT32_C(  751511241), INT32_C(-1270034962), INT32_C( -300357633)),
7426       simde_mm_set_epi64x(INT64_C(-2450775638354168945), INT64_C( 3)),
7427       simde_mm_set_epi32(INT32_C(  287956646), INT32_C(   93938905), INT32_C(  378116541), INT32_C(  499326207)) }
7428   };
7429 
7430   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7431     simde__m128i r = simde_mm_srl_epi32(test_vec[i].a, test_vec[i].count);
7432     simde_assert_m128i_i32(r, ==, test_vec[i].r);
7433   }
7434 
7435   return 0;
7436 }
7437 
7438 static int
7439 test_simde_mm_srl_epi64(SIMDE_MUNIT_TEST_ARGS) {
7440   const struct {
7441     simde__m128i a;
7442     simde__m128i count;
7443     simde__m128i r;
7444   } test_vec[8] = {
7445     { simde_mm_set_epi64x(INT64_C(  406793501195693531), INT64_C(-6918709284570423965)),
7446       simde_mm_set_epi64x(INT64_C( 4678230141678036905), INT64_C( 4)),
7447       simde_mm_set_epi64x(INT64_C(   25424593824730845), INT64_C(  720502174321195478)) },
7448     { simde_mm_set_epi64x(INT64_C(  277670893274960143), INT64_C( 4274684903451806593)),
7449       simde_mm_set_epi64x(INT64_C(  234386534661459961), INT64_C( 2)),
7450       simde_mm_set_epi64x(INT64_C(   69417723318740035), INT64_C( 1068671225862951648)) },
7451     { simde_mm_set_epi64x(INT64_C(-6726658248624717976), INT64_C(  577839631285416277)),
7452       simde_mm_set_epi64x(INT64_C( 8276161762185938564), INT64_C( 7)),
7453       simde_mm_set_epi64x(INT64_C(   91563170508475262), INT64_C(    4514372119417314)) },
7454     { simde_mm_set_epi64x(INT64_C(-5492184873618876565), INT64_C( 1344077644647636640)),
7455       simde_mm_set_epi64x(INT64_C( 1101003055866698034), INT64_C( 6)),
7456       simde_mm_set_epi64x(INT64_C(  202414987501416797), INT64_C(   21001213197619322)) },
7457     { simde_mm_set_epi64x(INT64_C( 2798551186010511102), INT64_C(-5049322973328296066)),
7458       simde_mm_set_epi64x(INT64_C(  735258903315099979), INT64_C( 1)),
7459       simde_mm_set_epi64x(INT64_C( 1399275593005255551), INT64_C( 6698710550190627775)) },
7460     { simde_mm_set_epi64x(INT64_C( 1589994297298459509), INT64_C(-7940471350808640863)),
7461       simde_mm_set_epi64x(INT64_C( 4870695400140482879), INT64_C(13)),
7462       simde_mm_set_epi64x(INT64_C(     194091100744440), INT64_C(    1282503994494740)) },
7463     { simde_mm_set_epi64x(INT64_C( 3244367925201818847), INT64_C(-8013733167196262052)),
7464       simde_mm_set_epi64x(INT64_C(-6737308052137237000), INT64_C( 3)),
7465       simde_mm_set_epi64x(INT64_C(  405545990650227355), INT64_C( 1304126363314161195)) },
7466     { simde_mm_set_epi64x(INT64_C(-8552629025006475575), INT64_C(-5454758622571993089)),
7467       simde_mm_set_epi64x(INT64_C(-2450775638354168945), INT64_C( 3)),
7468       simde_mm_set_epi64x(INT64_C( 1236764381087884505), INT64_C( 1623998181392194815)) }
7469   };
7470 
7471   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7472     simde__m128i r = simde_mm_srl_epi64(test_vec[i].a, test_vec[i].count);
7473     simde_assert_m128i_i64(r, ==, test_vec[i].r);
7474   }
7475 
7476   return 0;
7477 }
7478 
7479 static int
7480 test_simde_mm_sra_epi32(SIMDE_MUNIT_TEST_ARGS) {
7481   const struct {
7482     simde__m128i a;
7483     simde__m128i count;
7484     simde__m128i r;
7485   } test_vec[] = {
7486     { simde_mm_set_epi32( -561103335,  1276754862,  1749589432,   517536789),
7487       simde_mm_set_epi64x(0,  3),
7488       simde_mm_set_epi32(  -70137917,   159594357,   218698679,    64692098) },
7489     { simde_mm_set_epi32( -159892315, -1509631224, -1642880399,  1227124763),
7490       simde_mm_set_epi64x(0, 31),
7491       simde_mm_set_epi32(         -1,          -1,          -1,           0) },
7492     { simde_mm_set_epi32(-1747665335, -1727232090, -1061986990, -1651964431),
7493       simde_mm_set_epi64x(0, 21),
7494       simde_mm_set_epi32(       -834,        -824,        -507,        -788) },
7495     { simde_mm_set_epi32(  -43034101,  1748997429, -1014034292,  -471404994),
7496       simde_mm_set_epi64x(0, 23),
7497       simde_mm_set_epi32(         -6,         208,        -121,         -57) },
7498     { simde_mm_set_epi32(  663988211,   279391652,   930358665,   693100359),
7499       simde_mm_set_epi64x(0, 31),
7500       simde_mm_set_epi32(          0,           0,           0,           0) },
7501     { simde_mm_set_epi32( 1596760027,  -525985264, -1328341949, -1278585249),
7502       simde_mm_set_epi64x(0, 30),
7503       simde_mm_set_epi32(          1,          -1,          -2,          -2) },
7504     { simde_mm_set_epi32( 2099244913,  -668946691, -1425692748,  1445785661),
7505       simde_mm_set_epi64x(0, 19),
7506       simde_mm_set_epi32(       4003,       -1276,       -2720,        2757) },
7507     { simde_mm_set_epi32( -572539662,  1511976084, -2125946535, -1043884202),
7508       simde_mm_set_epi64x(0,  4),
7509       simde_mm_set_epi32(  -35783729,    94498505,  -132871659,   -65242763) }
7510   };
7511 
7512   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7513     simde__m128i r = simde_mm_sra_epi32(test_vec[i].a, test_vec[i].count);
7514     simde_assert_m128i_i32(r, ==, test_vec[i].r);
7515   }
7516 
7517   return 0;
7518 }
7519 
7520 static int
7521 test_simde_mm_srai_epi16(SIMDE_MUNIT_TEST_ARGS) {
7522   simde__m128i a, e, r;
7523 
7524   a = simde_mm_set_epi16(INT16_C( 11440), INT16_C( 15930), INT16_C( -6862), INT16_C(-12095),
7525                          INT16_C(  2973), INT16_C(-25395), INT16_C(-12983), INT16_C(-25536));
7526 
7527   e = simde_mm_set_epi16(INT16_C( 11440), INT16_C( 15930), INT16_C( -6862), INT16_C(-12095),
7528                          INT16_C(  2973), INT16_C(-25395), INT16_C(-12983), INT16_C(-25536));
7529   r = simde_mm_srai_epi16(a, 0);
7530   simde_assert_m128i_i16(r, ==, e);
7531 
7532   e = simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(    -1),
7533                          INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(    -1));
7534   r = simde_mm_srai_epi16(a, 16);
7535   simde_assert_m128i_i16(r, ==, e);
7536 
7537   e = simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(    -1),
7538                          INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(    -1));
7539   r = simde_mm_srai_epi16(a, 42);
7540   simde_assert_m128i_i16(r, ==, e);
7541 
7542   e = simde_mm_set_epi16(INT16_C(    89), INT16_C(   124), INT16_C(   -54), INT16_C(   -95),
7543                          INT16_C(    23), INT16_C(  -199), INT16_C(  -102), INT16_C(  -200));
7544   r = simde_mm_srai_epi16(a, 7);
7545   simde_assert_m128i_i16(r, ==, e);
7546 
7547   e = simde_mm_set_epi16(INT16_C(     1), INT16_C(     1), INT16_C(    -1), INT16_C(    -2),
7548                          INT16_C(     0), INT16_C(    -4), INT16_C(    -2), INT16_C(    -4));
7549   r = simde_mm_srai_epi16(a, 13);
7550   simde_assert_m128i_i16(r, ==, e);
7551 
7552   #if 0
7553   e = simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(    -1),
7554                          INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(    -1));
7555   r = simde_mm_srai_epi16(a, -7);
7556   simde_assert_m128i_i16(r, ==, e);
7557 
7558   e = simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(    -1),
7559                          INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(    -1));
7560   r = simde_mm_srai_epi16(a, -42);
7561   simde_assert_m128i_i16(r, ==, e);
7562 
7563   e = simde_mm_set_epi16(INT16_C(     0), INT16_C(     0), INT16_C(    -1), INT16_C(    -1),
7564                          INT16_C(     0), INT16_C(    -1), INT16_C(    -1), INT16_C(    -1));
7565   r = simde_mm_srai_epi16(a, 1729);
7566   simde_assert_m128i_i16(r, ==, e);
7567   #endif
7568 
7569   return 0;
7570 }
7571 
7572 static int
7573 test_simde_mm_srai_epi32(SIMDE_MUNIT_TEST_ARGS) {
7574   simde__m128i a, e, r;
7575 
7576   a = simde_mm_set_epi32(INT32_C(-1377123590), INT32_C( 1981969037), INT32_C( 1025592994), INT32_C( 1213959767));
7577 
7578   e = simde_mm_set_epi32(INT32_C(-1377123590), INT32_C( 1981969037), INT32_C( 1025592994), INT32_C( 1213959767));
7579   r = simde_mm_srai_epi32(a, 0);
7580   simde_assert_m128i_i16(r, ==, e);
7581 
7582   e = simde_mm_set_epi32(INT32_C(         -1), INT32_C(          0), INT32_C(          0), INT32_C(          0));
7583   r = simde_mm_srai_epi32(a, 32);
7584   simde_assert_m128i_i16(r, ==, e);
7585 
7586   e = simde_mm_set_epi32(INT32_C(         -1), INT32_C(          0), INT32_C(          0), INT32_C(          0));
7587   r = simde_mm_srai_epi32(a, 42);
7588   simde_assert_m128i_i16(r, ==, e);
7589 
7590   e = simde_mm_set_epi32(INT32_C(  -10758779), INT32_C(   15484133), INT32_C(    8012445), INT32_C(    9484060));
7591   r = simde_mm_srai_epi32(a, 7);
7592   simde_assert_m128i_i16(r, ==, e);
7593 
7594   e = simde_mm_set_epi32(INT32_C(    -168106), INT32_C(     241939), INT32_C(     125194), INT32_C(     148188));
7595   r = simde_mm_srai_epi32(a, 13);
7596   simde_assert_m128i_i16(r, ==, e);
7597 
7598   #if 0
7599   e = simde_mm_set_epi32(INT32_C(         -1), INT32_C(          0), INT32_C(          0), INT32_C(          0));
7600   r = simde_mm_srai_epi32(a, -7);
7601   simde_assert_m128i_i16(r, ==, e);
7602 
7603   e = simde_mm_set_epi32(INT32_C(         -1), INT32_C(          0), INT32_C(          0), INT32_C(          0));
7604   r = simde_mm_srai_epi32(a, -42);
7605   simde_assert_m128i_i16(r, ==, e);
7606 
7607   e = simde_mm_set_epi32(INT32_C(         -1), INT32_C(          0), INT32_C(          0), INT32_C(          0));
7608   r = simde_mm_srai_epi32(a, 1729);
7609   simde_assert_m128i_i16(r, ==, e);
7610   #endif
7611 
7612   return 0;
7613 }
7614 
7615 static int
7616 test_simde_mm_slli_epi16(SIMDE_MUNIT_TEST_ARGS) {
7617   const struct {
7618     simde__m128i a;
7619     simde__m128i r;
7620   } test_vec[8] = {
7621     { simde_mm_set_epi16(INT16_C(-29640), INT16_C( 27486), INT16_C(-30681), INT16_C( 22606),
7622                          INT16_C(-21221), INT16_C(  7042), INT16_C( -7099), INT16_C(-13884)),
7623       simde_mm_set_epi16(INT16_C( 25024), INT16_C( 23280), INT16_C( 16696), INT16_C(-15760),
7624                          INT16_C( 26840), INT16_C( -9200), INT16_C(  8744), INT16_C( 20000)) },
7625     { simde_mm_set_epi16(INT16_C( 15230), INT16_C( 23269), INT16_C(-21546), INT16_C( 15633),
7626                          INT16_C(  9645), INT16_C(-32001), INT16_C( -1446), INT16_C( -7049)),
7627       simde_mm_set_epi16(INT16_C( -9232), INT16_C(-10456), INT16_C( 24240), INT16_C( -6008),
7628                          INT16_C( 11624), INT16_C(  6136), INT16_C(-11568), INT16_C(  9144)) },
7629     { simde_mm_set_epi16(INT16_C( -4964), INT16_C( 29371), INT16_C( -7375), INT16_C(  7185),
7630                          INT16_C(-25257), INT16_C( 29335), INT16_C( 15023), INT16_C( 23258)),
7631       simde_mm_set_epi16(INT16_C( 25824), INT16_C(-27176), INT16_C(  6536), INT16_C( -8056),
7632                          INT16_C( -5448), INT16_C(-27464), INT16_C(-10888), INT16_C(-10544)) },
7633     { simde_mm_set_epi16(INT16_C(-29984), INT16_C(-17481), INT16_C(-31241), INT16_C( 11397),
7634                          INT16_C(  2926), INT16_C(-28904), INT16_C(-20560), INT16_C(-32448)),
7635       simde_mm_set_epi16(INT16_C( 22272), INT16_C( -8776), INT16_C( 12216), INT16_C( 25640),
7636                          INT16_C( 23408), INT16_C( 30912), INT16_C( 32128), INT16_C(  2560)) },
7637     { simde_mm_set_epi16(INT16_C(-18879), INT16_C(  5889), INT16_C(-27972), INT16_C( -4500),
7638                          INT16_C(-12683), INT16_C( 25849), INT16_C( 24809), INT16_C( 26782)),
7639       simde_mm_set_epi16(INT16_C(-19960), INT16_C(-18424), INT16_C(-27168), INT16_C( 29536),
7640                          INT16_C( 29608), INT16_C( 10184), INT16_C(  1864), INT16_C( 17648)) },
7641     { simde_mm_set_epi16(INT16_C(-12553), INT16_C(-22953), INT16_C( 21946), INT16_C( -9017),
7642                          INT16_C(-10462), INT16_C( -7608), INT16_C( 26015), INT16_C(-24893)),
7643       simde_mm_set_epi16(INT16_C( 30648), INT16_C( 12984), INT16_C(-21040), INT16_C( -6600),
7644                          INT16_C(-18160), INT16_C(  4672), INT16_C( 11512), INT16_C( -2536)) },
7645     { simde_mm_set_epi16(INT16_C( 23545), INT16_C(  -728), INT16_C( 17963), INT16_C(-24889),
7646                          INT16_C( 18443), INT16_C( 19433), INT16_C(-18886), INT16_C(-28120)),
7647       simde_mm_set_epi16(INT16_C( -8248), INT16_C( -5824), INT16_C( 12632), INT16_C( -2504),
7648                          INT16_C( 16472), INT16_C( 24392), INT16_C(-20016), INT16_C(-28352)) },
7649     { simde_mm_set_epi16(INT16_C(  1885), INT16_C(-18948), INT16_C(-21057), INT16_C(   636),
7650                          INT16_C( -9667), INT16_C(-20298), INT16_C( 25111), INT16_C( 30554)),
7651       simde_mm_set_epi16(INT16_C( 15080), INT16_C(-20512), INT16_C( 28152), INT16_C(  5088),
7652                          INT16_C(-11800), INT16_C(-31312), INT16_C(  4280), INT16_C(-17712)) }
7653   };
7654 
7655   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7656     simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
7657 
7658     simde__m128i r = simde_mm_slli_epi16(test_vec[i].a, 3);
7659     simde_assert_m128i_i16(r, ==, test_vec[i].r);
7660 
7661     r = simde_mm_slli_epi16(test_vec[i].a, 0);
7662     simde_assert_m128i_i16(r, ==, test_vec[i].a);
7663 
7664     r = simde_mm_slli_epi16(test_vec[i].a, 32);
7665     simde_assert_m128i_i16(r, ==, zeros);
7666 
7667     r = simde_mm_slli_epi16(test_vec[i].a, 33);
7668     simde_assert_m128i_i16(r, ==, zeros);
7669   }
7670 
7671   return 0;
7672 }
7673 
7674 static int
7675 test_simde_mm_srli_epi16(SIMDE_MUNIT_TEST_ARGS) {
7676   const struct {
7677     simde__m128i a;
7678     simde__m128i r;
7679   } test_vec[8] = {
7680     { simde_mm_set_epi16(INT16_C(-29640), INT16_C( 27486), INT16_C(-30681), INT16_C( 22606),
7681                          INT16_C(-21221), INT16_C(  7042), INT16_C( -7099), INT16_C(-13884)),
7682       simde_mm_set_epi16(INT16_C(  4487), INT16_C(  3435), INT16_C(  4356), INT16_C(  2825),
7683                          INT16_C(  5539), INT16_C(   880), INT16_C(  7304), INT16_C(  6456)) },
7684     { simde_mm_set_epi16(INT16_C( 15230), INT16_C( 23269), INT16_C(-21546), INT16_C( 15633),
7685                          INT16_C(  9645), INT16_C(-32001), INT16_C( -1446), INT16_C( -7049)),
7686       simde_mm_set_epi16(INT16_C(  1903), INT16_C(  2908), INT16_C(  5498), INT16_C(  1954),
7687                          INT16_C(  1205), INT16_C(  4191), INT16_C(  8011), INT16_C(  7310)) },
7688     { simde_mm_set_epi16(INT16_C( -4964), INT16_C( 29371), INT16_C( -7375), INT16_C(  7185),
7689                          INT16_C(-25257), INT16_C( 29335), INT16_C( 15023), INT16_C( 23258)),
7690       simde_mm_set_epi16(INT16_C(  7571), INT16_C(  3671), INT16_C(  7270), INT16_C(   898),
7691                          INT16_C(  5034), INT16_C(  3666), INT16_C(  1877), INT16_C(  2907)) },
7692     { simde_mm_set_epi16(INT16_C(-29984), INT16_C(-17481), INT16_C(-31241), INT16_C( 11397),
7693                          INT16_C(  2926), INT16_C(-28904), INT16_C(-20560), INT16_C(-32448)),
7694       simde_mm_set_epi16(INT16_C(  4444), INT16_C(  6006), INT16_C(  4286), INT16_C(  1424),
7695                          INT16_C(   365), INT16_C(  4579), INT16_C(  5622), INT16_C(  4136)) },
7696     { simde_mm_set_epi16(INT16_C(-18879), INT16_C(  5889), INT16_C(-27972), INT16_C( -4500),
7697                          INT16_C(-12683), INT16_C( 25849), INT16_C( 24809), INT16_C( 26782)),
7698       simde_mm_set_epi16(INT16_C(  5832), INT16_C(   736), INT16_C(  4695), INT16_C(  7629),
7699                          INT16_C(  6606), INT16_C(  3231), INT16_C(  3101), INT16_C(  3347)) },
7700     { simde_mm_set_epi16(INT16_C(-12553), INT16_C(-22953), INT16_C( 21946), INT16_C( -9017),
7701                          INT16_C(-10462), INT16_C( -7608), INT16_C( 26015), INT16_C(-24893)),
7702       simde_mm_set_epi16(INT16_C(  6622), INT16_C(  5322), INT16_C(  2743), INT16_C(  7064),
7703                          INT16_C(  6884), INT16_C(  7241), INT16_C(  3251), INT16_C(  5080)) },
7704     { simde_mm_set_epi16(INT16_C( 23545), INT16_C(  -728), INT16_C( 17963), INT16_C(-24889),
7705                          INT16_C( 18443), INT16_C( 19433), INT16_C(-18886), INT16_C(-28120)),
7706       simde_mm_set_epi16(INT16_C(  2943), INT16_C(  8101), INT16_C(  2245), INT16_C(  5080),
7707                          INT16_C(  2305), INT16_C(  2429), INT16_C(  5831), INT16_C(  4677)) },
7708     { simde_mm_set_epi16(INT16_C(  1885), INT16_C(-18948), INT16_C(-21057), INT16_C(   636),
7709                          INT16_C( -9667), INT16_C(-20298), INT16_C( 25111), INT16_C( 30554)),
7710       simde_mm_set_epi16(INT16_C(   235), INT16_C(  5823), INT16_C(  5559), INT16_C(    79),
7711                          INT16_C(  6983), INT16_C(  5654), INT16_C(  3138), INT16_C(  3819)) }
7712   };
7713 
7714   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7715     simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
7716 
7717     simde__m128i r = simde_mm_srli_epi16(test_vec[i].a, 3);
7718     simde_assert_m128i_i16(r, ==, test_vec[i].r);
7719 
7720     r = simde_mm_srli_epi16(test_vec[i].a, 0);
7721     simde_assert_m128i_i16(r, ==, test_vec[i].a);
7722 
7723     r = simde_mm_srli_epi16(test_vec[i].a, 16);
7724     simde_assert_m128i_i16(r, ==, zeros);
7725 
7726     r = simde_mm_srli_epi16(test_vec[i].a, 17);
7727     simde_assert_m128i_i16(r, ==, zeros);
7728   }
7729 
7730   return 0;
7731 }
7732 
7733 static int
7734 test_simde_mm_slli_epi32(SIMDE_MUNIT_TEST_ARGS) {
7735   const struct {
7736     simde__m128i a;
7737     simde__m128i r;
7738   } test_vec[8] = {
7739     { simde_mm_set_epi32(-1285208672,  1618695439, -1484382898,    97979804),
7740       simde_mm_set_epi32( 1822995456,   258646496,  -255612480, -1159613568) },
7741     { simde_mm_set_epi32( -215274446, -1750972712, -2134111648,  -338295419),
7742       simde_mm_set_epi32( 1701152320,  -196551936,   427904000,  2059448480) },
7743     { simde_mm_set_epi32(  406577052, -1272707531,  -128013424,  1090211344),
7744       simde_mm_set_epi32(  125563776, -2071935328,   198537728,   527024640) },
7745     { simde_mm_set_epi32( -547315834,   386023226,   789460810, -2097507270),
7746       simde_mm_set_epi32( -334237504,  -532158656,  -507057856,  1599244096) },
7747     { simde_mm_set_epi32( 1943314584,   126005183,   199695502, -1431967820),
7748       simde_mm_set_epi32( 2056524544,  -262801440,  2095288768,  1421670016) },
7749     { simde_mm_set_epi32( 1899687789, -1162493730,  1537811436,   825134965),
7750       simde_mm_set_epi32(  660467104,  1454906304,  1965325696,   634515104) },
7751     { simde_mm_set_epi32(-1452393292, -1781210226, -1307434085, -2039047771),
7752       simde_mm_set_epi32(  768054912, -1164152384,  1111782240,  -825019232) },
7753     { simde_mm_set_epi32(-1646930836,   816193989, -1662050152,   347461227),
7754       simde_mm_set_epi32(-1162179200,   348403872, -1645997312, -1766142624) }
7755   };
7756 
7757   static const struct {
7758     const int32_t a[4];
7759     const int32_t r[4];
7760   } test_vec_18[] = {
7761     { {  INT32_C(  2018447505),  INT32_C(  2072485070), -INT32_C(  1063800373),  INT32_C(  1619529499) },
7762       {  INT32_C(  1111752704),  INT32_C(  1933049856), -INT32_C(   953417728),  INT32_C(  1013710848) } },
7763     { {  INT32_C(  1312528525), -INT32_C(  1886008265),  INT32_C(   615191858),  INT32_C(  1445629892) },
7764       {  INT32_C(  1647575040),  INT32_C(   819724288),  INT32_C(  1422393344),  INT32_C(  1058013184) } },
7765     { {  INT32_C(  1842248351), -INT32_C(   504867562),  INT32_C(   564232198),  INT32_C(   495004047) },
7766       { -INT32_C(   360972288),  INT32_C(  1415053312),  INT32_C(     1572864), -INT32_C(  1506017280) } },
7767     { { -INT32_C(   127157055), -INT32_C(  1148780408), -INT32_C(   622906602),  INT32_C(  1630538178) },
7768       { -INT32_C(   217841664),  INT32_C(    35651584), -INT32_C(   866648064),  INT32_C(   654835712) } },
7769     { { -INT32_C(  1714487421),  INT32_C(  1534834260), -INT32_C(   964944842),  INT32_C(   132382278) },
7770       { -INT32_C(    32768000), -INT32_C(   649068544),  INT32_C(  1893203968), -INT32_C(   115867648) } },
7771     { {  INT32_C(  1124093626),  INT32_C(  1711179599),  INT32_C(  2084560314),  INT32_C(  1792897254) },
7772       {  INT32_C(   988282880),  INT32_C(   490471424),  INT32_C(  1994915840), -INT32_C(  1013448704) } },
7773     { { -INT32_C(  1023169681), -INT32_C(  1742832030), -INT32_C(   513893477),  INT32_C(  1407730073) },
7774       { -INT32_C(  1380188160), -INT32_C(   108527616),  INT32_C(  1852571648),  INT32_C(   107216896) } },
7775     { { -INT32_C(   543758192),  INT32_C(   709137520),  INT32_C(  1487373169),  INT32_C(  1656915187) },
7776       { -INT32_C(  1572864000),  INT32_C(  1371537408),  INT32_C(   230948864),  INT32_C(   332136448) } },
7777   };
7778 
7779   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7780     simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
7781 
7782     simde__m128i r = simde_mm_slli_epi32(test_vec[i].a, 5);
7783     simde_assert_m128i_i32(r, ==, test_vec[i].r);
7784 
7785     r = simde_mm_slli_epi32(test_vec[i].a, 0);
7786     simde_assert_m128i_i32(r, ==, test_vec[i].a);
7787 
7788     r = simde_mm_slli_epi32(test_vec[i].a, 32);
7789     simde_assert_m128i_i32(r, ==, zeros);
7790 
7791     r = simde_mm_slli_epi32(test_vec[i].a, 33);
7792     simde_assert_m128i_i32(r, ==, zeros);
7793   }
7794 
7795   for (size_t i = 0 ; i < (sizeof(test_vec_18) / sizeof(test_vec_18[0])) ; i++) {
7796     simde__m128i a = simde_x_mm_loadu_epi32(test_vec_18[i].a);
7797     simde__m128i r = simde_mm_slli_epi32(a, 18);
7798     simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec_18[i].r));
7799   }
7800 
7801   return 0;
7802 }
7803 
7804 static int
7805 test_simde_mm_srli_epi32(SIMDE_MUNIT_TEST_ARGS) {
7806   const struct {
7807     simde__m128i a;
7808     simde__m128i r;
7809   } test_vec[8] = {
7810     { simde_mm_set_epi32(   71624167,   617025209,  -286267780, -1151099730),
7811       simde_mm_set_epi32(    2238255,    19282037,   125271859,    98245861) },
7812     { simde_mm_set_epi32(-1660949423,    45505817,  1892774959,  -917815961),
7813       simde_mm_set_epi32(   82313058,     1422056,    59149217,   105535979) },
7814     { simde_mm_set_epi32( 1642659615,  -757986143, -1891097222,   940303240),
7815       simde_mm_set_epi32(   51333112,   110530661,    75120939,    29384476) },
7816     { simde_mm_set_epi32( 1761409447,   115333600,  -589319110, -1530115830),
7817       simde_mm_set_epi32(   55044045,     3604175,   115801505,    86401608) },
7818     { simde_mm_set_epi32( -502944468, -1500485927,    32222499,  1115657749),
7819       simde_mm_set_epi32(  118500713,    87327542,     1006953,    34864304) },
7820     { simde_mm_set_epi32( -545012251,   924477372, -1883097200,  1327167226),
7821       simde_mm_set_epi32(  117186095,    28889917,    75370940,    41473975) },
7822     { simde_mm_set_epi32(  995448668,   377764585, -1462273550,  1306007963),
7823       simde_mm_set_epi32(   31107770,    11805143,    88521679,    40812748) },
7824     { simde_mm_set_epi32( 1991954175,   665906947,  -606406775,  1678465696),
7825       simde_mm_set_epi32(   62248567,    20809592,   115267516,    52452053) }
7826   };
7827 
7828   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7829     simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
7830 
7831     simde__m128i r = simde_mm_srli_epi32(test_vec[i].a, 5);
7832     simde_assert_m128i_i32(r, ==, test_vec[i].r);
7833 
7834     r = simde_mm_srli_epi32(test_vec[i].a, 0);
7835     simde_assert_m128i_i32(r, ==, test_vec[i].a);
7836 
7837     r = simde_mm_srli_epi32(test_vec[i].a, 64);
7838     simde_assert_m128i_i32(r, ==, zeros);
7839 
7840     r = simde_mm_srli_epi32(test_vec[i].a, 65);
7841     simde_assert_m128i_i32(r, ==, zeros);
7842   }
7843 
7844   static const struct {
7845     const int32_t a[4];
7846     const int32_t r[4];
7847   } test_vec_18[] = {
7848     { { -INT32_C(  1359328745), -INT32_C(   408445706),  INT32_C(   239121880),  INT32_C(   748205077) },
7849       {  INT32_C(       11198),  INT32_C(       14825),  INT32_C(         912),  INT32_C(        2854) } },
7850     { { -INT32_C(   345859164),  INT32_C(  1010393205),  INT32_C(  1843309992), -INT32_C(   446698290) },
7851       {  INT32_C(       15064),  INT32_C(        3854),  INT32_C(        7031),  INT32_C(       14679) } },
7852     { {  INT32_C(   764631350), -INT32_C(   837534730),  INT32_C(    98325744), -INT32_C(  1405979384) },
7853       {  INT32_C(        2916),  INT32_C(       13189),  INT32_C(         375),  INT32_C(       11020) } },
7854     { { -INT32_C(  2053663728), -INT32_C(  1648176907),  INT32_C(  1275764862), -INT32_C(  1020106099) },
7855       {  INT32_C(        8549),  INT32_C(       10096),  INT32_C(        4866),  INT32_C(       12492) } },
7856     { { -INT32_C(  1175403069), -INT32_C(   259586816),  INT32_C(  1660314713), -INT32_C(   384948007) },
7857       {  INT32_C(       11900),  INT32_C(       15393),  INT32_C(        6333),  INT32_C(       14915) } },
7858     { { -INT32_C(  1318148420), -INT32_C(   196136842),  INT32_C(  1581341137), -INT32_C(  2027850813) },
7859       {  INT32_C(       11355),  INT32_C(       15635),  INT32_C(        6032),  INT32_C(        8648) } },
7860     { {  INT32_C(   960500280),  INT32_C(  1881786391),  INT32_C(    97656620),  INT32_C(    82764103) },
7861       {  INT32_C(        3664),  INT32_C(        7178),  INT32_C(         372),  INT32_C(         315) } },
7862     { { -INT32_C(    38445945),  INT32_C(  1592919181),  INT32_C(   565982046), -INT32_C(   559358554) },
7863       {  INT32_C(       16237),  INT32_C(        6076),  INT32_C(        2159),  INT32_C(       14250) } },
7864   };
7865 
7866   for (size_t i = 0 ; i < (sizeof(test_vec_18) / sizeof(test_vec_18[0])) ; i++) {
7867     simde__m128i a = simde_x_mm_loadu_epi32(test_vec_18[i].a);
7868     simde__m128i r = simde_mm_srli_epi32(a, 18);
7869     simde_test_x86_assert_equal_i32x4(r, simde_x_mm_loadu_epi32(test_vec_18[i].r));
7870   }
7871 
7872   return 0;
7873 }
7874 
7875 static int
7876 test_simde_mm_slli_epi64(SIMDE_MUNIT_TEST_ARGS) {
7877   const struct {
7878     simde__m128i a;
7879     simde__m128i r;
7880   } test_vec[8] = {
7881     { simde_mm_set_epi64x(INT64_C(-2315072815474662386), INT64_C( 6072154117607221746)),
7882       simde_mm_set_epi64x(INT64_C(-1181415201403959552), INT64_C( 2472475957923215616)) },
7883     { simde_mm_set_epi64x(INT64_C(-2150345518249743204), INT64_C( 3180241355952247476)),
7884       simde_mm_set_epi64x(INT64_C( 1456934769676144128), INT64_C( 1242523940277541376)) },
7885     { simde_mm_set_epi64x(INT64_C( 6492638483912689614), INT64_C( 7722522576063149658)),
7886       simde_mm_set_epi64x(INT64_C(  954242623894447872), INT64_C(-7641290244232631040)) },
7887     { simde_mm_set_epi64x(INT64_C(-3437371876454060839), INT64_C(-7472017034411611746)),
7888       simde_mm_set_epi64x(INT64_C( 2738257582909451392), INT64_C( 2812511428210380544)) },
7889     { simde_mm_set_epi64x(INT64_C(-2994435188669454779), INT64_C(-4713226846452985822)),
7890       simde_mm_set_epi64x(INT64_C( 4093921398210372224), INT64_C( 5449518086433018112)) },
7891     { simde_mm_set_epi64x(INT64_C(-1348831542752523511), INT64_C(-2794326256527200530)),
7892       simde_mm_set_epi64x(INT64_C(-6629740808937044864), INT64_C(-7185623435000187136)) },
7893     { simde_mm_set_epi64x(INT64_C(-5602242705933140185), INT64_C( 6492190109232091873)),
7894       simde_mm_set_epi64x(INT64_C( 2335952515230569344), INT64_C(  896850664777937024)) },
7895     { simde_mm_set_epi64x(INT64_C( 8003331601608352009), INT64_C(-5520322068937257120)),
7896       simde_mm_set_epi64x(INT64_C(-8591223121865833344), INT64_C(-5624950023005949952)) }
7897   };
7898 
7899   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7900     simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
7901 
7902     simde__m128i r = simde_mm_slli_epi64(test_vec[i].a, 7);
7903     simde_assert_m128i_i32(r, ==, test_vec[i].r);
7904 
7905     r = simde_mm_slli_epi64(test_vec[i].a, 0);
7906     simde_assert_m128i_i32(r, ==, test_vec[i].a);
7907 
7908     r = simde_mm_slli_epi64(test_vec[i].a, 64);
7909     simde_assert_m128i_i32(r, ==, zeros);
7910 
7911     r = simde_mm_slli_epi64(test_vec[i].a, 65);
7912     simde_assert_m128i_i32(r, ==, zeros);
7913   }
7914 
7915   return 0;
7916 }
7917 
7918 static int
7919 test_simde_mm_srli_epi64(SIMDE_MUNIT_TEST_ARGS) {
7920   const struct {
7921     simde__m128i a;
7922     simde__m128i r;
7923   } test_vec[8] = {
7924     { simde_mm_set_epi64x(INT64_C(-2315072815474662386), INT64_C( 6072154117607221746)),
7925       simde_mm_set_epi64x(INT64_C(  126028681704960072), INT64_C(   47438704043806419)) },
7926     { simde_mm_set_epi64x(INT64_C(-2150345518249743204), INT64_C( 3180241355952247476)),
7927       simde_mm_set_epi64x(INT64_C(  127315613714529753), INT64_C(   24845635593376933)) },
7928     { simde_mm_set_epi64x(INT64_C( 6492638483912689614), INT64_C( 7722522576063149658)),
7929       simde_mm_set_epi64x(INT64_C(   50723738155567887), INT64_C(   60332207625493356)) },
7930     { simde_mm_set_epi64x(INT64_C(-3437371876454060839), INT64_C(-7472017034411611746)),
7931       simde_mm_set_epi64x(INT64_C(  117260720291058521), INT64_C(   85740054994515155)) },
7932     { simde_mm_set_epi64x(INT64_C(-2994435188669454779), INT64_C(-4713226846452985822)),
7933       simde_mm_set_epi64x(INT64_C(  120721163164375756), INT64_C(  107293103337941920)) },
7934     { simde_mm_set_epi64x(INT64_C(-1348831542752523511), INT64_C(-2794326256527200530)),
7935       simde_mm_set_epi64x(INT64_C(  133577441648101782), INT64_C(  122284514196737117)) },
7936     { simde_mm_set_epi64x(INT64_C(-5602242705933140185), INT64_C( 6492190109232091873)),
7937       simde_mm_set_epi64x(INT64_C(  100347666935753214), INT64_C(   50720235228375717)) },
7938     { simde_mm_set_epi64x(INT64_C( 8003331601608352009), INT64_C(-5520322068937257120)),
7939       simde_mm_set_epi64x(INT64_C(   62526028137565250), INT64_C(  100987671912283550)) }
7940   };
7941 
7942   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
7943     simde__m128i r;
7944     simde__m128i zeros = simde_mm_set1_epi64x(INT64_C(0));
7945 
7946     /* r = simde_mm_srli_epi64(test_vec[i].a, -1); */
7947     /* simde_assert_m128i_i32(r, ==, zeros); */
7948 
7949     r = simde_mm_srli_epi64(test_vec[i].a, 0);
7950     simde_assert_m128i_i32(r, ==, test_vec[i].a);
7951 
7952     r = simde_mm_srli_epi64(test_vec[i].a, 7);
7953     simde_assert_m128i_i32(r, ==, test_vec[i].r);
7954 
7955     r = simde_mm_srli_epi64(test_vec[i].a, 64);
7956     simde_assert_m128i_i32(r, ==, zeros);
7957 
7958     r = simde_mm_srli_epi64(test_vec[i].a, 65);
7959     simde_assert_m128i_i32(r, ==, zeros);
7960   }
7961 
7962   return 0;
7963 }
7964 
7965 static int
7966 test_simde_mm_store_pd(SIMDE_MUNIT_TEST_ARGS) {
7967   const struct {
7968     simde__m128d a;
7969     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
7970   } test_vec[8] = {
7971     { simde_mm_set_pd(SIMDE_FLOAT64_C(  825.31), SIMDE_FLOAT64_C(  176.75)),
7972       {SIMDE_FLOAT64_C(   176.75), SIMDE_FLOAT64_C(  825.31) } },
7973     { simde_mm_set_pd(SIMDE_FLOAT64_C( -248.58), SIMDE_FLOAT64_C( -171.93)),
7974       {SIMDE_FLOAT64_C(  -171.93), SIMDE_FLOAT64_C( -248.58) } },
7975     { simde_mm_set_pd(SIMDE_FLOAT64_C(  419.77), SIMDE_FLOAT64_C(  712.85)),
7976       {SIMDE_FLOAT64_C(   712.85), SIMDE_FLOAT64_C(  419.77) } },
7977     { simde_mm_set_pd(SIMDE_FLOAT64_C(  785.55), SIMDE_FLOAT64_C(   78.74)),
7978       {SIMDE_FLOAT64_C(    78.74), SIMDE_FLOAT64_C(  785.55) } },
7979     { simde_mm_set_pd(SIMDE_FLOAT64_C( -963.92), SIMDE_FLOAT64_C(  614.28)),
7980       {SIMDE_FLOAT64_C(   614.28), SIMDE_FLOAT64_C( -963.92) } },
7981     { simde_mm_set_pd(SIMDE_FLOAT64_C(  624.62), SIMDE_FLOAT64_C( -260.28)),
7982       {SIMDE_FLOAT64_C(  -260.28), SIMDE_FLOAT64_C(  624.62) } },
7983     { simde_mm_set_pd(SIMDE_FLOAT64_C( -178.24), SIMDE_FLOAT64_C(  945.12)),
7984       {SIMDE_FLOAT64_C(   945.12), SIMDE_FLOAT64_C( -178.24) } },
7985     { simde_mm_set_pd(SIMDE_FLOAT64_C( -271.60), SIMDE_FLOAT64_C( -674.20)),
7986       {SIMDE_FLOAT64_C(  -674.20), SIMDE_FLOAT64_C( -271.60) } }
7987   };
7988 
7989   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
7990     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
7991     simde_mm_store_pd(r, test_vec[i].a);
7992     simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
7993   }
7994 
7995   return 0;
7996 }
7997 
7998 static int
7999 test_simde_mm_store_pd1(SIMDE_MUNIT_TEST_ARGS) {
8000   const struct {
8001     simde__m128d a;
8002     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8003   } test_vec[8] = {
8004     { simde_mm_set_pd(SIMDE_FLOAT64_C(  278.50), SIMDE_FLOAT64_C(  554.87)),
8005       {SIMDE_FLOAT64_C(   554.87), SIMDE_FLOAT64_C(  554.87) } },
8006     { simde_mm_set_pd(SIMDE_FLOAT64_C( -348.28), SIMDE_FLOAT64_C(  361.13)),
8007       {SIMDE_FLOAT64_C(   361.13), SIMDE_FLOAT64_C(  361.13) } },
8008     { simde_mm_set_pd(SIMDE_FLOAT64_C( -701.38), SIMDE_FLOAT64_C(  708.23)),
8009       {SIMDE_FLOAT64_C(   708.23), SIMDE_FLOAT64_C(  708.23) } },
8010     { simde_mm_set_pd(SIMDE_FLOAT64_C( -362.77), SIMDE_FLOAT64_C( -574.16)),
8011       {SIMDE_FLOAT64_C(  -574.16), SIMDE_FLOAT64_C( -574.16) } },
8012     { simde_mm_set_pd(SIMDE_FLOAT64_C(  420.63), SIMDE_FLOAT64_C(  850.70)),
8013       {SIMDE_FLOAT64_C(   850.70), SIMDE_FLOAT64_C(  850.70) } },
8014     { simde_mm_set_pd(SIMDE_FLOAT64_C( -223.78), SIMDE_FLOAT64_C(  845.58)),
8015       {SIMDE_FLOAT64_C(   845.58), SIMDE_FLOAT64_C(  845.58) } },
8016     { simde_mm_set_pd(SIMDE_FLOAT64_C(  948.70), SIMDE_FLOAT64_C(  544.62)),
8017       {SIMDE_FLOAT64_C(   544.62), SIMDE_FLOAT64_C(  544.62) } },
8018     { simde_mm_set_pd(SIMDE_FLOAT64_C( -216.79), SIMDE_FLOAT64_C( -830.24)),
8019       {SIMDE_FLOAT64_C(  -830.24), SIMDE_FLOAT64_C( -830.24) } }
8020   };
8021 
8022   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8023     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8024     simde_mm_store_pd1(r, test_vec[i].a);
8025     simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8026   }
8027 
8028   return 0;
8029 }
8030 
8031 static int
8032 test_simde_mm_store_sd(SIMDE_MUNIT_TEST_ARGS) {
8033   const struct {
8034     simde__m128d a;
8035     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 b[2];
8036     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8037   } test_vec[8] = {
8038     { simde_mm_set_pd(SIMDE_FLOAT64_C( -380.32), SIMDE_FLOAT64_C(  589.10)),
8039       {SIMDE_FLOAT64_C(  -886.38), SIMDE_FLOAT64_C(  706.27) },
8040       {SIMDE_FLOAT64_C(   589.10), SIMDE_FLOAT64_C(  706.27) } },
8041     { simde_mm_set_pd(SIMDE_FLOAT64_C(   97.55), SIMDE_FLOAT64_C( -921.93)),
8042       {SIMDE_FLOAT64_C(   175.08), SIMDE_FLOAT64_C( -498.43) },
8043       {SIMDE_FLOAT64_C(  -921.93), SIMDE_FLOAT64_C( -498.43) } },
8044     { simde_mm_set_pd(SIMDE_FLOAT64_C( -962.76), SIMDE_FLOAT64_C( -267.73)),
8045       {SIMDE_FLOAT64_C(  -505.37), SIMDE_FLOAT64_C( -729.92) },
8046       {SIMDE_FLOAT64_C(  -267.73), SIMDE_FLOAT64_C( -729.92) } },
8047     { simde_mm_set_pd(SIMDE_FLOAT64_C(  154.59), SIMDE_FLOAT64_C( -829.83)),
8048       {SIMDE_FLOAT64_C(   141.33), SIMDE_FLOAT64_C(  657.26) },
8049       {SIMDE_FLOAT64_C(  -829.83), SIMDE_FLOAT64_C(  657.26) } },
8050     { simde_mm_set_pd(SIMDE_FLOAT64_C( -623.49), SIMDE_FLOAT64_C( -306.50)),
8051       {SIMDE_FLOAT64_C(  -540.89), SIMDE_FLOAT64_C(  213.61) },
8052       {SIMDE_FLOAT64_C(  -306.50), SIMDE_FLOAT64_C(  213.61) } },
8053     { simde_mm_set_pd(SIMDE_FLOAT64_C(  708.57), SIMDE_FLOAT64_C( -626.05)),
8054       {SIMDE_FLOAT64_C(  -658.64), SIMDE_FLOAT64_C(  310.68) },
8055       {SIMDE_FLOAT64_C(  -626.05), SIMDE_FLOAT64_C(  310.68) } },
8056     { simde_mm_set_pd(SIMDE_FLOAT64_C(    7.10), SIMDE_FLOAT64_C(   84.59)),
8057       {SIMDE_FLOAT64_C(   191.88), SIMDE_FLOAT64_C( -258.06) },
8058       {SIMDE_FLOAT64_C(    84.59), SIMDE_FLOAT64_C( -258.06) } },
8059     { simde_mm_set_pd(SIMDE_FLOAT64_C(  399.99), SIMDE_FLOAT64_C( -337.50)),
8060       {SIMDE_FLOAT64_C(   733.91), SIMDE_FLOAT64_C( -756.49) },
8061       {SIMDE_FLOAT64_C(  -337.50), SIMDE_FLOAT64_C( -756.49) } }
8062   };
8063 
8064   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8065     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8066     simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b));
8067     simde_mm_store_sd(r, test_vec[i].a);
8068     simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8069   }
8070 
8071   return 0;
8072 }
8073 
8074 static int
8075 test_simde_mm_store_si128(SIMDE_MUNIT_TEST_ARGS) {
8076   const struct {
8077     simde__m128i a;
8078     simde__m128i r;
8079   } test_vec[8] = {
8080     { simde_mm_set_epi32(INT32_C(-1969078312), INT32_C( 1646650233), INT32_C(-1190611301), INT32_C(  889904733)),
8081       simde_mm_set_epi32(INT32_C(-1969078312), INT32_C( 1646650233), INT32_C(-1190611301), INT32_C(  889904733)) },
8082     { simde_mm_set_epi32(INT32_C(  361491951), INT32_C(-1497327260), INT32_C(-2092062445), INT32_C(-1242536811)),
8083       simde_mm_set_epi32(INT32_C(  361491951), INT32_C(-1497327260), INT32_C(-2092062445), INT32_C(-1242536811)) },
8084     { simde_mm_set_epi32(INT32_C(  790325756), INT32_C( -295457696), INT32_C(   30297459), INT32_C(  860807687)),
8085       simde_mm_set_epi32(INT32_C(  790325756), INT32_C( -295457696), INT32_C(   30297459), INT32_C(  860807687)) },
8086     { simde_mm_set_epi32(INT32_C(-1228048681), INT32_C( 1236867704), INT32_C(-1927827785), INT32_C(-1233913343)),
8087       simde_mm_set_epi32(INT32_C(-1228048681), INT32_C( 1236867704), INT32_C(-1927827785), INT32_C(-1233913343)) },
8088     { simde_mm_set_epi32(INT32_C( 1007412231), INT32_C( -296710614), INT32_C(-1416317108), INT32_C( -839008134)),
8089       simde_mm_set_epi32(INT32_C( 1007412231), INT32_C( -296710614), INT32_C(-1416317108), INT32_C( -839008134)) },
8090     { simde_mm_set_epi32(INT32_C( 1325410731), INT32_C( 2049780007), INT32_C(  190337706), INT32_C( 1948643128)),
8091       simde_mm_set_epi32(INT32_C( 1325410731), INT32_C( 2049780007), INT32_C(  190337706), INT32_C( 1948643128)) },
8092     { simde_mm_set_epi32(INT32_C(-1295145224), INT32_C( -913388140), INT32_C(-1185110338), INT32_C(  127220065)),
8093       simde_mm_set_epi32(INT32_C(-1295145224), INT32_C( -913388140), INT32_C(-1185110338), INT32_C(  127220065)) },
8094     { simde_mm_set_epi32(INT32_C(  479405479), INT32_C(  641965302), INT32_C(-1100092667), INT32_C( 1837148945)),
8095       simde_mm_set_epi32(INT32_C(  479405479), INT32_C(  641965302), INT32_C(-1100092667), INT32_C( 1837148945)) }
8096   };
8097 
8098   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8099     simde__m128i r;
8100     simde_mm_store_si128(&r, test_vec[i].a);
8101     simde_assert_m128i_i32(r, ==, test_vec[i].r);
8102   }
8103 
8104   return 0;
8105 }
8106 
8107 static int
8108 test_simde_mm_storeh_pd(SIMDE_MUNIT_TEST_ARGS) {
8109   const struct {
8110     simde__m128d a;
8111     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 b[2];
8112     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8113   } test_vec[8] = {
8114     { simde_mm_set_pd(SIMDE_FLOAT64_C( -154.79), SIMDE_FLOAT64_C(  689.59)),
8115       {SIMDE_FLOAT64_C(  -986.30), SIMDE_FLOAT64_C( -463.82) },
8116       {SIMDE_FLOAT64_C(  -154.79), SIMDE_FLOAT64_C( -463.82) } },
8117     { simde_mm_set_pd(SIMDE_FLOAT64_C(  944.07), SIMDE_FLOAT64_C( -598.47)),
8118       {SIMDE_FLOAT64_C(  -514.42), SIMDE_FLOAT64_C(  652.02) },
8119       {SIMDE_FLOAT64_C(   944.07), SIMDE_FLOAT64_C(  652.02) } },
8120     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -34.72), SIMDE_FLOAT64_C( -771.52)),
8121       {SIMDE_FLOAT64_C(   343.91), SIMDE_FLOAT64_C( -171.75) },
8122       {SIMDE_FLOAT64_C(   -34.72), SIMDE_FLOAT64_C( -171.75) } },
8123     { simde_mm_set_pd(SIMDE_FLOAT64_C(  305.40), SIMDE_FLOAT64_C( -671.87)),
8124       {SIMDE_FLOAT64_C(  -579.65), SIMDE_FLOAT64_C( -985.37) },
8125       {SIMDE_FLOAT64_C(   305.40), SIMDE_FLOAT64_C( -985.37) } },
8126     { simde_mm_set_pd(SIMDE_FLOAT64_C(  173.51), SIMDE_FLOAT64_C(  643.06)),
8127       {SIMDE_FLOAT64_C(   794.84), SIMDE_FLOAT64_C(  233.08) },
8128       {SIMDE_FLOAT64_C(   173.51), SIMDE_FLOAT64_C(  233.08) } },
8129     { simde_mm_set_pd(SIMDE_FLOAT64_C( -130.21), SIMDE_FLOAT64_C( -290.59)),
8130       {SIMDE_FLOAT64_C(   584.05), SIMDE_FLOAT64_C( -167.57) },
8131       {SIMDE_FLOAT64_C(  -130.21), SIMDE_FLOAT64_C( -167.57) } },
8132     { simde_mm_set_pd(SIMDE_FLOAT64_C( -759.48), SIMDE_FLOAT64_C(  428.70)),
8133       {SIMDE_FLOAT64_C(    36.98), SIMDE_FLOAT64_C( -189.97) },
8134       {SIMDE_FLOAT64_C(  -759.48), SIMDE_FLOAT64_C( -189.97) } },
8135     { simde_mm_set_pd(SIMDE_FLOAT64_C(  222.49), SIMDE_FLOAT64_C(  621.71)),
8136       {SIMDE_FLOAT64_C(  -467.95), SIMDE_FLOAT64_C( -910.73) },
8137       {SIMDE_FLOAT64_C(   222.49), SIMDE_FLOAT64_C( -910.73) } }
8138   };
8139 
8140   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8141     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8142     simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b));
8143     simde_mm_storeh_pd(r, test_vec[i].a);
8144     simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8145   }
8146 
8147   return 0;
8148 }
8149 
8150 static int
8151 test_simde_mm_storel_epi64(SIMDE_MUNIT_TEST_ARGS) {
8152   const struct {
8153     simde__m128i a;
8154     simde__m128i b;
8155     simde__m128i r;
8156   } test_vec[8] = {
8157     { simde_mm_set_epi64x(INT64_C(-8572402204481175152), INT64_C(-3565447379630862345)),
8158       simde_mm_set_epi64x(INT64_C(-5836787758646654491), INT64_C(  978262207997446536)),
8159       simde_mm_set_epi64x(INT64_C(-5836787758646654491), INT64_C(-3565447379630862345)) },
8160     { simde_mm_set_epi64x(INT64_C(  883894259135204982), INT64_C(-6785295924552521928)),
8161       simde_mm_set_epi64x(INT64_C( 5751908210058630765), INT64_C(-7999305285706001942)),
8162       simde_mm_set_epi64x(INT64_C( 5751908210058630765), INT64_C(-6785295924552521928)) },
8163     { simde_mm_set_epi64x(INT64_C( 4991496111910955453), INT64_C(-1947231678451890517)),
8164       simde_mm_set_epi64x(INT64_C( 1054715717267865334), INT64_C(-5199938312574175167)),
8165       simde_mm_set_epi64x(INT64_C( 1054715717267865334), INT64_C(-1947231678451890517)) },
8166     { simde_mm_set_epi64x(INT64_C(-6916286228894702079), INT64_C(-7888320918323423602)),
8167       simde_mm_set_epi64x(INT64_C(-4560271213984560857), INT64_C( 1030486561279856923)),
8168       simde_mm_set_epi64x(INT64_C(-4560271213984560857), INT64_C(-7888320918323423602)) },
8169     { simde_mm_set_epi64x(INT64_C(-5516402797122916761), INT64_C( 8516393373254709766)),
8170       simde_mm_set_epi64x(INT64_C(-8984432431227422893), INT64_C(-1285772213781786319)),
8171       simde_mm_set_epi64x(INT64_C(-8984432431227422893), INT64_C( 8516393373254709766)) },
8172     { simde_mm_set_epi64x(INT64_C( 1537881028582424966), INT64_C( 3855597324285413517)),
8173       simde_mm_set_epi64x(INT64_C(-1087659369158402202), INT64_C( 5504181592152866903)),
8174       simde_mm_set_epi64x(INT64_C(-1087659369158402202), INT64_C( 3855597324285413517)) },
8175     { simde_mm_set_epi64x(INT64_C(-1003754336566127903), INT64_C( 3155788073225494266)),
8176       simde_mm_set_epi64x(INT64_C( 7014294951579480267), INT64_C(-6777837266490471507)),
8177       simde_mm_set_epi64x(INT64_C( 7014294951579480267), INT64_C( 3155788073225494266)) },
8178     { simde_mm_set_epi64x(INT64_C( 7343239871058385173), INT64_C(-8089093160963830084)),
8179       simde_mm_set_epi64x(INT64_C(-7180996141698966448), INT64_C( 1747758344108352756)),
8180       simde_mm_set_epi64x(INT64_C(-7180996141698966448), INT64_C(-8089093160963830084)) }
8181   };
8182 
8183   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8184     simde__m128i r;
8185     simde_memcpy(&r, &(test_vec[i].b), sizeof(r));
8186     simde_mm_storel_epi64(&r, test_vec[i].a);
8187     simde_assert_m128i_i64(r, ==, test_vec[i].r);
8188   }
8189 
8190   return 0;
8191 }
8192 
8193 static int
8194 test_simde_mm_storel_pd(SIMDE_MUNIT_TEST_ARGS) {
8195   const struct {
8196     simde__m128d a;
8197     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 b[2];
8198     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8199   } test_vec[8] = {
8200     { simde_mm_set_pd(SIMDE_FLOAT64_C( -887.08), SIMDE_FLOAT64_C( -520.70)),
8201       {SIMDE_FLOAT64_C(  -258.49), SIMDE_FLOAT64_C(  913.00) },
8202       {SIMDE_FLOAT64_C(  -520.70), SIMDE_FLOAT64_C(  913.00) } },
8203     { simde_mm_set_pd(SIMDE_FLOAT64_C(  724.04), SIMDE_FLOAT64_C( -774.49)),
8204       {SIMDE_FLOAT64_C(   557.37), SIMDE_FLOAT64_C( -701.13) },
8205       {SIMDE_FLOAT64_C(  -774.49), SIMDE_FLOAT64_C( -701.13) } },
8206     { simde_mm_set_pd(SIMDE_FLOAT64_C( -366.90), SIMDE_FLOAT64_C( -168.25)),
8207       {SIMDE_FLOAT64_C(   485.14), SIMDE_FLOAT64_C(  500.94) },
8208       {SIMDE_FLOAT64_C(  -168.25), SIMDE_FLOAT64_C(  500.94) } },
8209     { simde_mm_set_pd(SIMDE_FLOAT64_C( -783.51), SIMDE_FLOAT64_C( -187.73)),
8210       {SIMDE_FLOAT64_C(  -391.92), SIMDE_FLOAT64_C( -506.74) },
8211       {SIMDE_FLOAT64_C(  -187.73), SIMDE_FLOAT64_C( -506.74) } },
8212     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -50.27), SIMDE_FLOAT64_C( -405.84)),
8213       {SIMDE_FLOAT64_C(  -733.12), SIMDE_FLOAT64_C( -697.37) },
8214       {SIMDE_FLOAT64_C(  -405.84), SIMDE_FLOAT64_C( -697.37) } },
8215     { simde_mm_set_pd(SIMDE_FLOAT64_C( -773.31), SIMDE_FLOAT64_C( -470.65)),
8216       {SIMDE_FLOAT64_C(   738.01), SIMDE_FLOAT64_C( -908.23) },
8217       {SIMDE_FLOAT64_C(  -470.65), SIMDE_FLOAT64_C( -908.23) } },
8218     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -43.07), SIMDE_FLOAT64_C( -143.29)),
8219       {SIMDE_FLOAT64_C(   985.95), SIMDE_FLOAT64_C(   19.70) },
8220       {SIMDE_FLOAT64_C(  -143.29), SIMDE_FLOAT64_C(   19.70) } },
8221     { simde_mm_set_pd(SIMDE_FLOAT64_C(  649.59), SIMDE_FLOAT64_C( -925.70)),
8222       {SIMDE_FLOAT64_C(   519.96), SIMDE_FLOAT64_C(  348.23) },
8223       {SIMDE_FLOAT64_C(  -925.70), SIMDE_FLOAT64_C(  348.23) } }
8224   };
8225 
8226   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8227     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8228     simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b));
8229     simde_mm_storel_pd(r, test_vec[i].a);
8230     simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8231   }
8232 
8233   return 0;
8234 }
8235 
8236 static int
8237 test_simde_mm_storer_pd(SIMDE_MUNIT_TEST_ARGS) {
8238   const struct {
8239     simde__m128d a;
8240     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 b[2];
8241     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8242   } test_vec[8] = {
8243     { simde_mm_set_pd(SIMDE_FLOAT64_C(  765.90), SIMDE_FLOAT64_C( -392.20)),
8244       {SIMDE_FLOAT64_C(  -898.96), SIMDE_FLOAT64_C(  810.87) },
8245       {SIMDE_FLOAT64_C(   765.90), SIMDE_FLOAT64_C( -392.20) } },
8246     { simde_mm_set_pd(SIMDE_FLOAT64_C(  848.80), SIMDE_FLOAT64_C(  -20.45)),
8247       {SIMDE_FLOAT64_C(  -298.33), SIMDE_FLOAT64_C(  199.86) },
8248       {SIMDE_FLOAT64_C(   848.80), SIMDE_FLOAT64_C(  -20.45) } },
8249     { simde_mm_set_pd(SIMDE_FLOAT64_C( -770.38), SIMDE_FLOAT64_C(   73.29)),
8250       {SIMDE_FLOAT64_C(  -471.45), SIMDE_FLOAT64_C(   85.53) },
8251       {SIMDE_FLOAT64_C(  -770.38), SIMDE_FLOAT64_C(   73.29) } },
8252     { simde_mm_set_pd(SIMDE_FLOAT64_C(  181.32), SIMDE_FLOAT64_C( -528.68)),
8253       {SIMDE_FLOAT64_C(   925.12), SIMDE_FLOAT64_C(  -79.25) },
8254       {SIMDE_FLOAT64_C(   181.32), SIMDE_FLOAT64_C( -528.68) } },
8255     { simde_mm_set_pd(SIMDE_FLOAT64_C(  786.51), SIMDE_FLOAT64_C( -396.45)),
8256       {SIMDE_FLOAT64_C(  -196.75), SIMDE_FLOAT64_C( -493.37) },
8257       {SIMDE_FLOAT64_C(   786.51), SIMDE_FLOAT64_C( -396.45) } },
8258     { simde_mm_set_pd(SIMDE_FLOAT64_C(  379.82), SIMDE_FLOAT64_C( -482.63)),
8259       {SIMDE_FLOAT64_C(   356.61), SIMDE_FLOAT64_C(    6.76) },
8260       {SIMDE_FLOAT64_C(   379.82), SIMDE_FLOAT64_C( -482.63) } },
8261     { simde_mm_set_pd(SIMDE_FLOAT64_C( -597.31), SIMDE_FLOAT64_C( -427.66)),
8262       {SIMDE_FLOAT64_C(  -787.49), SIMDE_FLOAT64_C(  322.82) },
8263       {SIMDE_FLOAT64_C(  -597.31), SIMDE_FLOAT64_C( -427.66) } },
8264     { simde_mm_set_pd(SIMDE_FLOAT64_C(  515.42), SIMDE_FLOAT64_C(  801.05)),
8265       {SIMDE_FLOAT64_C(  -892.50), SIMDE_FLOAT64_C(  794.29) },
8266       {SIMDE_FLOAT64_C(   515.42), SIMDE_FLOAT64_C(  801.05) } }
8267   };
8268 
8269   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8270     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8271     simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b));
8272     simde_mm_storer_pd(r, test_vec[i].a);
8273     simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8274   }
8275 
8276   return 0;
8277 }
8278 
8279 static int
8280 test_simde_mm_storeu_pd(SIMDE_MUNIT_TEST_ARGS) {
8281   const struct {
8282     simde__m128d a;
8283     simde_float64 b[2];
8284     simde_float64 r[2];
8285   } test_vec[8] = {
8286     { simde_mm_set_pd(SIMDE_FLOAT64_C( -787.29), SIMDE_FLOAT64_C(  410.40)),
8287       {SIMDE_FLOAT64_C(   579.61), SIMDE_FLOAT64_C( -320.32) },
8288       {SIMDE_FLOAT64_C(   410.40), SIMDE_FLOAT64_C( -787.29) } },
8289     { simde_mm_set_pd(SIMDE_FLOAT64_C(  944.41), SIMDE_FLOAT64_C( -149.27)),
8290       {SIMDE_FLOAT64_C(   850.87), SIMDE_FLOAT64_C( -993.24) },
8291       {SIMDE_FLOAT64_C(  -149.27), SIMDE_FLOAT64_C(  944.41) } },
8292     { simde_mm_set_pd(SIMDE_FLOAT64_C( -415.98), SIMDE_FLOAT64_C( -916.88)),
8293       {SIMDE_FLOAT64_C(   966.39), SIMDE_FLOAT64_C( -183.52) },
8294       {SIMDE_FLOAT64_C(  -916.88), SIMDE_FLOAT64_C( -415.98) } },
8295     { simde_mm_set_pd(SIMDE_FLOAT64_C(  431.98), SIMDE_FLOAT64_C( -691.20)),
8296       {SIMDE_FLOAT64_C(  -659.73), SIMDE_FLOAT64_C(  -34.04) },
8297       {SIMDE_FLOAT64_C(  -691.20), SIMDE_FLOAT64_C(  431.98) } },
8298     { simde_mm_set_pd(SIMDE_FLOAT64_C( -483.35), SIMDE_FLOAT64_C(  766.13)),
8299       {SIMDE_FLOAT64_C(  -638.61), SIMDE_FLOAT64_C(  157.38) },
8300       {SIMDE_FLOAT64_C(   766.13), SIMDE_FLOAT64_C( -483.35) } },
8301     { simde_mm_set_pd(SIMDE_FLOAT64_C(  386.12), SIMDE_FLOAT64_C(  330.08)),
8302       {SIMDE_FLOAT64_C(   588.80), SIMDE_FLOAT64_C( -111.35) },
8303       {SIMDE_FLOAT64_C(   330.08), SIMDE_FLOAT64_C(  386.12) } },
8304     { simde_mm_set_pd(SIMDE_FLOAT64_C(   45.12), SIMDE_FLOAT64_C(  964.86)),
8305       {SIMDE_FLOAT64_C(   199.95), SIMDE_FLOAT64_C(  998.07) },
8306       {SIMDE_FLOAT64_C(   964.86), SIMDE_FLOAT64_C(   45.12) } },
8307     { simde_mm_set_pd(SIMDE_FLOAT64_C(  -30.76), SIMDE_FLOAT64_C( -723.78)),
8308       {SIMDE_FLOAT64_C(    -8.78), SIMDE_FLOAT64_C(  410.81) },
8309       {SIMDE_FLOAT64_C(  -723.78), SIMDE_FLOAT64_C(  -30.76) } }
8310   };
8311 
8312   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8313     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8314     simde_memcpy(r, &(test_vec[i].b), sizeof(test_vec[i].b));
8315     simde_mm_storeu_pd(r, test_vec[i].a);
8316     simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8317   }
8318 
8319   return 0;
8320 }
8321 
8322 static int
8323 test_simde_mm_storeu_si128(SIMDE_MUNIT_TEST_ARGS) {
8324   const struct {
8325     simde__m128i a;
8326     simde__m128i r;
8327   } test_vec[8] = {
8328     { simde_mm_set_epi32(INT32_C(  559775826), INT32_C( -953548695), INT32_C(  811731668), INT32_C(  717258119)),
8329       simde_mm_set_epi32(INT32_C(  559775826), INT32_C( -953548695), INT32_C(  811731668), INT32_C(  717258119)) },
8330     { simde_mm_set_epi32(INT32_C( -819991397), INT32_C(-1367017296), INT32_C( 1998597245), INT32_C( -194600747)),
8331       simde_mm_set_epi32(INT32_C( -819991397), INT32_C(-1367017296), INT32_C( 1998597245), INT32_C( -194600747)) },
8332     { simde_mm_set_epi32(INT32_C(-1983970353), INT32_C( 1036245224), INT32_C( 1208146280), INT32_C( 2086212378)),
8333       simde_mm_set_epi32(INT32_C(-1983970353), INT32_C( 1036245224), INT32_C( 1208146280), INT32_C( 2086212378)) },
8334     { simde_mm_set_epi32(INT32_C(-1115487208), INT32_C( 1901412157), INT32_C( -373768038), INT32_C( 1379732008)),
8335       simde_mm_set_epi32(INT32_C(-1115487208), INT32_C( 1901412157), INT32_C( -373768038), INT32_C( 1379732008)) },
8336     { simde_mm_set_epi32(INT32_C( -772363216), INT32_C( 1208166493), INT32_C( 2006133231), INT32_C( -567476934)),
8337       simde_mm_set_epi32(INT32_C( -772363216), INT32_C( 1208166493), INT32_C( 2006133231), INT32_C( -567476934)) },
8338     { simde_mm_set_epi32(INT32_C( -117502444), INT32_C(  175751722), INT32_C(-1353399970), INT32_C( -281466966)),
8339       simde_mm_set_epi32(INT32_C( -117502444), INT32_C(  175751722), INT32_C(-1353399970), INT32_C( -281466966)) },
8340     { simde_mm_set_epi32(INT32_C( 2118723593), INT32_C(-1657083210), INT32_C( 1907402314), INT32_C(  669913338)),
8341       simde_mm_set_epi32(INT32_C( 2118723593), INT32_C(-1657083210), INT32_C( 1907402314), INT32_C(  669913338)) },
8342     { simde_mm_set_epi32(INT32_C(  372135232), INT32_C( 1779530333), INT32_C(-1088754891), INT32_C( 1773872281)),
8343       simde_mm_set_epi32(INT32_C(  372135232), INT32_C( 1779530333), INT32_C(-1088754891), INT32_C( 1773872281)) }
8344   };
8345 
8346   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8347     simde__m128i r;
8348     simde_mm_storeu_si128(&r, test_vec[i].a);
8349     simde_assert_m128i_i64(r, ==, test_vec[i].r);
8350   }
8351 
8352   return 0;
8353 }
8354 
8355 static int
8356 test_simde_mm_storeu_si16 (SIMDE_MUNIT_TEST_ARGS) {
8357   static const struct {
8358     const int16_t a[8];
8359     const int16_t r;
8360   } test_vec[] = {
8361     { { -INT16_C(  6988),  INT16_C( 26153), -INT16_C(  2289), -INT16_C(  8118),  INT16_C( 29038),  INT16_C(  9460), -INT16_C( 25910), -INT16_C(  2208) },
8362       -INT16_C(  6988) },
8363     { {  INT16_C(  4717), -INT16_C( 14623),  INT16_C( 14017), -INT16_C( 21548),  INT16_C( 18960), -INT16_C( 20965),  INT16_C( 24253),  INT16_C( 29067) },
8364        INT16_C(  4717) },
8365     { { -INT16_C( 19389),  INT16_C( 21207),  INT16_C(  8619),  INT16_C(  6450),  INT16_C(  9874),  INT16_C( 23869), -INT16_C( 25152),  INT16_C( 11604) },
8366       -INT16_C( 19389) },
8367     { {  INT16_C( 14000),  INT16_C( 29171), -INT16_C( 14484),  INT16_C( 31772),  INT16_C( 14353), -INT16_C( 12758), -INT16_C( 19050), -INT16_C(  9920) },
8368        INT16_C( 14000) },
8369     { {  INT16_C(  5993),  INT16_C(  5163),  INT16_C( 23865), -INT16_C( 13523),  INT16_C( 27523),  INT16_C( 17448),  INT16_C( 32008), -INT16_C( 18319) },
8370        INT16_C(  5993) },
8371     { {  INT16_C( 26035),  INT16_C(  7977),  INT16_C( 17964),  INT16_C( 16027), -INT16_C( 14722),  INT16_C(  5132),  INT16_C( 19579), -INT16_C(  6674) },
8372        INT16_C( 26035) },
8373     { {  INT16_C(  6500), -INT16_C( 25095),  INT16_C( 10103), -INT16_C(  1432), -INT16_C( 28270), -INT16_C( 26050), -INT16_C( 20466), -INT16_C( 16045) },
8374        INT16_C(  6500) },
8375     { {  INT16_C( 31765),  INT16_C( 16864),  INT16_C( 31682),  INT16_C( 16511), -INT16_C( 29631), -INT16_C( 17067),  INT16_C( 17368),  INT16_C( 15522) },
8376        INT16_C( 31765) }
8377   };
8378 
8379   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
8380     simde__m128i a = simde_x_mm_loadu_epi16(test_vec[i].a);
8381     int16_t r;
8382     HEDLEY_CONCAT(simde,_mm_storeu_si16)(&r, a);
8383     simde_assert_equal_i16(r, test_vec[i].r);
8384   }
8385 
8386   return 0;
8387 }
8388 
8389 static int
8390 test_simde_mm_storeu_si32 (SIMDE_MUNIT_TEST_ARGS) {
8391   static const struct {
8392     const int32_t a[4];
8393     const int32_t r;
8394   } test_vec[] = {
8395     { { -INT32_C(   630341273), -INT32_C(   601100258),  INT32_C(   527009452),  INT32_C(   382213470) },
8396       -INT32_C(   630341273) },
8397     { {  INT32_C(   733254901),  INT32_C(   225181130), -INT32_C(   418546734), -INT32_C(  1459105470) },
8398        INT32_C(   733254901) },
8399     { { -INT32_C(  1333562222),  INT32_C(   277655396), -INT32_C(  1825508043),  INT32_C(   145356818) },
8400       -INT32_C(  1333562222) },
8401     { {  INT32_C(  1446207116),  INT32_C(   761503323),  INT32_C(  1544843545), -INT32_C(   721085374) },
8402        INT32_C(  1446207116) },
8403     { { -INT32_C(   175797872),  INT32_C(  1829048888),  INT32_C(   436286727), -INT32_C(  1188910547) },
8404       -INT32_C(   175797872) },
8405     { {  INT32_C(  1661949192),  INT32_C(   227570676),  INT32_C(   644457956),  INT32_C(  1375432641) },
8406        INT32_C(  1661949192) },
8407     { {  INT32_C(   809927160), -INT32_C(  1700967277), -INT32_C(  1347117439),  INT32_C(  1365825097) },
8408        INT32_C(   809927160) },
8409     { {  INT32_C(   548763692), -INT32_C(   819116565), -INT32_C(  1409968150), -INT32_C(    16912122) },
8410        INT32_C(   548763692) }
8411   };
8412 
8413   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
8414     simde__m128i a = simde_x_mm_loadu_epi32(test_vec[i].a);
8415     int32_t r;
8416     HEDLEY_CONCAT(simde,_mm_storeu_si32)(&r, a);
8417     simde_assert_equal_i32(r, test_vec[i].r);
8418   }
8419 
8420   return 0;
8421 }
8422 
8423 static int
8424 test_simde_mm_storeu_si64 (SIMDE_MUNIT_TEST_ARGS) {
8425   static const struct {
8426     const int64_t a[2];
8427     const int64_t r;
8428   } test_vec[] = {
8429     { {  INT64_C( 1269957435069449074),  INT64_C( 6198123151038108778) },
8430        INT64_C( 1269957435069449074) },
8431     { { -INT64_C( 1631810497504953952), -INT64_C( 5530541008416845765) },
8432       -INT64_C( 1631810497504953952) },
8433     { { -INT64_C( 6740103892576997931), -INT64_C(   59573331693324629) },
8434       -INT64_C( 6740103892576997931) },
8435     { { -INT64_C( 9008073061231320301), -INT64_C(  564917926918647499) },
8436       -INT64_C( 9008073061231320301) },
8437     { { -INT64_C( 1996551244505816721),  INT64_C(  965994603972566793) },
8438       -INT64_C( 1996551244505816721) },
8439     { {  INT64_C(  815745091936186761), -INT64_C( 8734544458042763860) },
8440        INT64_C(  815745091936186761) },
8441     { {  INT64_C(  191535998296794507), -INT64_C( 3305974968983330281) },
8442        INT64_C(  191535998296794507) },
8443     { {  INT64_C(  407001106525339075),  INT64_C( 6676759969134880266) },
8444        INT64_C(  407001106525339075) }
8445   };
8446 
8447   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
8448     simde__m128i a = simde_x_mm_loadu_epi64(test_vec[i].a);
8449     int64_t r;
8450     HEDLEY_CONCAT(simde,_mm_storeu_si64)(&r, a);
8451     simde_assert_equal_i64(r, test_vec[i].r);
8452   }
8453 
8454   return 0;
8455 }
8456 
8457 static int
8458 test_simde_mm_store1_pd(SIMDE_MUNIT_TEST_ARGS) {
8459   const struct {
8460     simde__m128d a;
8461     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8462   } test_vec[8] = {
8463     { simde_mm_set_pd(SIMDE_FLOAT64_C(  291.96), SIMDE_FLOAT64_C(  -70.45)),
8464       { SIMDE_FLOAT64_C(  -70.45), SIMDE_FLOAT64_C(  -70.45) } },
8465     { simde_mm_set_pd(SIMDE_FLOAT64_C(  896.84), SIMDE_FLOAT64_C(  840.00)),
8466       { SIMDE_FLOAT64_C(  840.00), SIMDE_FLOAT64_C(  840.00) } },
8467     { simde_mm_set_pd(SIMDE_FLOAT64_C(  792.86), SIMDE_FLOAT64_C(  559.02)),
8468       { SIMDE_FLOAT64_C(  559.02), SIMDE_FLOAT64_C(  559.02) } },
8469     { simde_mm_set_pd(SIMDE_FLOAT64_C(  401.40), SIMDE_FLOAT64_C( -245.84)),
8470       { SIMDE_FLOAT64_C( -245.84), SIMDE_FLOAT64_C( -245.84) } },
8471     { simde_mm_set_pd(SIMDE_FLOAT64_C(  441.21), SIMDE_FLOAT64_C(  731.20)),
8472       { SIMDE_FLOAT64_C(  731.20), SIMDE_FLOAT64_C(  731.20) } },
8473     { simde_mm_set_pd(SIMDE_FLOAT64_C(  569.26), SIMDE_FLOAT64_C( -434.33)),
8474       { SIMDE_FLOAT64_C( -434.33), SIMDE_FLOAT64_C( -434.33) } },
8475     { simde_mm_set_pd(SIMDE_FLOAT64_C( -796.24), SIMDE_FLOAT64_C(  534.91)),
8476       { SIMDE_FLOAT64_C(  534.91), SIMDE_FLOAT64_C(  534.91) } },
8477     { simde_mm_set_pd(SIMDE_FLOAT64_C( -221.59), SIMDE_FLOAT64_C( -372.35)),
8478       { SIMDE_FLOAT64_C( -372.35), SIMDE_FLOAT64_C( -372.35) } },
8479   };
8480 
8481   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8482     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2] ;
8483     simde_mm_store1_pd(r, test_vec[i].a);
8484     simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 4);
8485   }
8486 
8487   return 0;
8488 }
8489 
8490 static int
8491 test_simde_mm_stream_pd(SIMDE_MUNIT_TEST_ARGS) {
8492   const struct {
8493     simde__m128d a;
8494     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8495   } test_vec[8] = {
8496     { simde_mm_set_pd(SIMDE_FLOAT64_C( -749.31), SIMDE_FLOAT64_C( -483.97)),
8497       {SIMDE_FLOAT64_C(  -483.97), SIMDE_FLOAT64_C( -749.31) } },
8498     { simde_mm_set_pd(SIMDE_FLOAT64_C(  587.52), SIMDE_FLOAT64_C( -903.15)),
8499       {SIMDE_FLOAT64_C(  -903.15), SIMDE_FLOAT64_C(  587.52) } },
8500     { simde_mm_set_pd(SIMDE_FLOAT64_C( -515.61), SIMDE_FLOAT64_C(  144.37)),
8501       {SIMDE_FLOAT64_C(   144.37), SIMDE_FLOAT64_C( -515.61) } },
8502     { simde_mm_set_pd(SIMDE_FLOAT64_C( -183.60), SIMDE_FLOAT64_C(  483.36)),
8503       {SIMDE_FLOAT64_C(   483.36), SIMDE_FLOAT64_C( -183.60) } },
8504     { simde_mm_set_pd(SIMDE_FLOAT64_C(   33.37), SIMDE_FLOAT64_C( -802.26)),
8505       {SIMDE_FLOAT64_C(  -802.26), SIMDE_FLOAT64_C(   33.37) } },
8506     { simde_mm_set_pd(SIMDE_FLOAT64_C( -131.42), SIMDE_FLOAT64_C( -156.48)),
8507       {SIMDE_FLOAT64_C(  -156.48), SIMDE_FLOAT64_C( -131.42) } },
8508     { simde_mm_set_pd(SIMDE_FLOAT64_C( -317.87), SIMDE_FLOAT64_C(  140.87)),
8509       {SIMDE_FLOAT64_C(   140.87), SIMDE_FLOAT64_C( -317.87) } },
8510     { simde_mm_set_pd(SIMDE_FLOAT64_C(  924.07), SIMDE_FLOAT64_C(  709.42)),
8511       {SIMDE_FLOAT64_C(   709.42), SIMDE_FLOAT64_C(  924.07) } }
8512   };
8513 
8514   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8515     SIMDE_ALIGN_LIKE_16(simde__m128d) simde_float64 r[2];
8516     simde_mm_stream_pd(r, test_vec[i].a);
8517     simde_assert_equal_vf64(sizeof(r) / sizeof(r[0]), r, test_vec[i].r, 1);
8518   }
8519 
8520   return 0;
8521 }
8522 
8523 static int
8524 test_simde_mm_stream_si128(SIMDE_MUNIT_TEST_ARGS) {
8525   const struct {
8526     simde__m128i a;
8527     simde__m128i r;
8528   } test_vec[8] = {
8529     { simde_mm_set_epi32(INT32_C(   34091183), INT32_C(  572850908), INT32_C(  428781754), INT32_C(-1984722387)),
8530       simde_mm_set_epi32(INT32_C(   34091183), INT32_C(  572850908), INT32_C(  428781754), INT32_C(-1984722387)) },
8531     { simde_mm_set_epi32(INT32_C( 2059236852), INT32_C(  436410728), INT32_C(  338757718), INT32_C( 1985336145)),
8532       simde_mm_set_epi32(INT32_C( 2059236852), INT32_C(  436410728), INT32_C(  338757718), INT32_C( 1985336145)) },
8533     { simde_mm_set_epi32(INT32_C( -559686487), INT32_C(  981390363), INT32_C(  629822759), INT32_C(   26629572)),
8534       simde_mm_set_epi32(INT32_C( -559686487), INT32_C(  981390363), INT32_C(  629822759), INT32_C(   26629572)) },
8535     { simde_mm_set_epi32(INT32_C( 1401959784), INT32_C( -900492538), INT32_C( -328421218), INT32_C(  452144845)),
8536       simde_mm_set_epi32(INT32_C( 1401959784), INT32_C( -900492538), INT32_C( -328421218), INT32_C(  452144845)) },
8537     { simde_mm_set_epi32(INT32_C( 1914664610), INT32_C( 1467736241), INT32_C(-2062482935), INT32_C(-1765775255)),
8538       simde_mm_set_epi32(INT32_C( 1914664610), INT32_C( 1467736241), INT32_C(-2062482935), INT32_C(-1765775255)) },
8539     { simde_mm_set_epi32(INT32_C(  659730578), INT32_C(  874862437), INT32_C( -487086426), INT32_C(-1161523548)),
8540       simde_mm_set_epi32(INT32_C(  659730578), INT32_C(  874862437), INT32_C( -487086426), INT32_C(-1161523548)) },
8541     { simde_mm_set_epi32(INT32_C( 1195652072), INT32_C( -415424127), INT32_C(   77100736), INT32_C( 1699618155)),
8542       simde_mm_set_epi32(INT32_C( 1195652072), INT32_C( -415424127), INT32_C(   77100736), INT32_C( 1699618155)) },
8543     { simde_mm_set_epi32(INT32_C( 1626943139), INT32_C( 1327578602), INT32_C(-1477047999), INT32_C( 1569415359)),
8544       simde_mm_set_epi32(INT32_C( 1626943139), INT32_C( 1327578602), INT32_C(-1477047999), INT32_C( 1569415359)) }
8545   };
8546 
8547   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8548     simde__m128i r;
8549     simde_mm_stream_si128(&r, test_vec[i].a);
8550     simde_assert_m128i_i32(r, ==, test_vec[i].r);
8551   }
8552 
8553   return 0;
8554 }
8555 
8556 static int
8557 test_simde_mm_stream_si32(SIMDE_MUNIT_TEST_ARGS) {
8558   const struct {
8559     int32_t a;
8560     int32_t r;
8561   } test_vec[8] = {
8562     {  -895547977,  -895547977 },
8563     {  1712937231,  1712937231 },
8564     { -1086654689, -1086654689 },
8565     {  1855506850,  1855506850 },
8566     {  1870001810,  1870001810 },
8567     {  -396094407,  -396094407 },
8568     { -1262223993, -1262223993 },
8569     {  2015532253,  2015532253 }
8570   };
8571 
8572   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8573     int32_t r;
8574     simde_mm_stream_si32(&r, test_vec[i].a);
8575     simde_assert_equal_i32(r, test_vec[i].r);
8576   }
8577 
8578   return 0;
8579 }
8580 
8581 static int
8582 test_simde_mm_stream_si64(SIMDE_MUNIT_TEST_ARGS) {
8583   const struct {
8584     int64_t a;
8585     int64_t r;
8586   } test_vec[8] = {
8587     { INT64_C( -908741869362791955), INT64_C( -908741869362791955) },
8588     { INT64_C( 6977779886002528513), INT64_C( 6977779886002528513) },
8589     { INT64_C(-3803748866185605675), INT64_C(-3803748866185605675) },
8590     { INT64_C( 9126491633461219066), INT64_C( 9126491633461219066) },
8591     { INT64_C(-1680016917440909978), INT64_C(-1680016917440909978) },
8592     { INT64_C( 9194247506078439345), INT64_C( 9194247506078439345) },
8593     { INT64_C(-5911248664473270680), INT64_C(-5911248664473270680) },
8594     { INT64_C(-9131883318362768052), INT64_C(-9131883318362768052) }
8595   };
8596 
8597   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
8598     int64_t r;
8599 
8600     #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_NATIVE_ALIASES_TESTING)
8601       simde_mm_stream_si64(HEDLEY_REINTERPRET_CAST(long long int*, &r), test_vec[i].a);
8602     #else
8603       simde_mm_stream_si64(&r, test_vec[i].a);
8604     #endif
8605 
8606     simde_assert_equal_i64(r, test_vec[i].r);
8607   }
8608 
8609   return 0;
8610 }
8611 
8612 static int
8613 test_simde_mm_sub_epi8(SIMDE_MUNIT_TEST_ARGS) {
8614   const struct {
8615     simde__m128i a;
8616     simde__m128i b;
8617     simde__m128i r;
8618   } test_vec[8] = {
8619     { simde_mm_set_epi8(INT8_C(-114), INT8_C(-102), INT8_C( -21), INT8_C(  93),
8620                         INT8_C(-120), INT8_C( 125), INT8_C( -36), INT8_C( -23),
8621                         INT8_C( -12), INT8_C(  11), INT8_C(  22), INT8_C(  78),
8622                         INT8_C( -16), INT8_C(  57), INT8_C(  71), INT8_C( 112)),
8623       simde_mm_set_epi8(INT8_C( -15), INT8_C( -47), INT8_C(   8), INT8_C(  73),
8624                         INT8_C(  45), INT8_C(  37), INT8_C(  27), INT8_C( -63),
8625                         INT8_C( -74), INT8_C( -76), INT8_C( -34), INT8_C(  78),
8626                         INT8_C( -50), INT8_C( 121), INT8_C(-113), INT8_C(-123)),
8627       simde_mm_set_epi8(INT8_C( -99), INT8_C( -55), INT8_C( -29), INT8_C(  20),
8628                         INT8_C(  91), INT8_C(  88), INT8_C( -63), INT8_C(  40),
8629                         INT8_C(  62), INT8_C(  87), INT8_C(  56), INT8_C(   0),
8630                         INT8_C(  34), INT8_C( -64), INT8_C( -72), INT8_C( -21)) },
8631     { simde_mm_set_epi8(INT8_C( -71), INT8_C( -77), INT8_C( -40), INT8_C(  99),
8632                         INT8_C( -40), INT8_C(  24), INT8_C(  45), INT8_C( 125),
8633                         INT8_C(  16), INT8_C(  82), INT8_C( -66), INT8_C( -93),
8634                         INT8_C(  92), INT8_C(  60), INT8_C(  65), INT8_C(  70)),
8635       simde_mm_set_epi8(INT8_C( -15), INT8_C( -11), INT8_C(  41), INT8_C(  35),
8636                         INT8_C(  87), INT8_C( -22), INT8_C( -28), INT8_C( -74),
8637                         INT8_C(  88), INT8_C(-100), INT8_C(  28), INT8_C( -30),
8638                         INT8_C( 122), INT8_C( -93), INT8_C( -11), INT8_C(  47)),
8639       simde_mm_set_epi8(INT8_C( -56), INT8_C( -66), INT8_C( -81), INT8_C(  64),
8640                         INT8_C(-127), INT8_C(  46), INT8_C(  73), INT8_C( -57),
8641                         INT8_C( -72), INT8_C( -74), INT8_C( -94), INT8_C( -63),
8642                         INT8_C( -30), INT8_C(-103), INT8_C(  76), INT8_C(  23)) },
8643     { simde_mm_set_epi8(INT8_C(  88), INT8_C(   7), INT8_C(  21), INT8_C(  40),
8644                         INT8_C( -45), INT8_C( -52), INT8_C( 105), INT8_C(   9),
8645                         INT8_C( -65), INT8_C( -48), INT8_C(  74), INT8_C( -11),
8646                         INT8_C(  71), INT8_C( -73), INT8_C( -92), INT8_C(-128)),
8647       simde_mm_set_epi8(INT8_C(  51), INT8_C( -25), INT8_C(  14), INT8_C( -31),
8648                         INT8_C( -75), INT8_C(  81), INT8_C( 123), INT8_C( -32),
8649                         INT8_C( -73), INT8_C(-121), INT8_C(  36), INT8_C( -43),
8650                         INT8_C(  95), INT8_C(  -5), INT8_C(  71), INT8_C( -67)),
8651       simde_mm_set_epi8(INT8_C(  37), INT8_C(  32), INT8_C(   7), INT8_C(  71),
8652                         INT8_C(  30), INT8_C( 123), INT8_C( -18), INT8_C(  41),
8653                         INT8_C(   8), INT8_C(  73), INT8_C(  38), INT8_C(  32),
8654                         INT8_C( -24), INT8_C( -68), INT8_C(  93), INT8_C( -61)) },
8655     { simde_mm_set_epi8(INT8_C( -26), INT8_C( -30), INT8_C(-127), INT8_C( -96),
8656                         INT8_C( -93), INT8_C(  85), INT8_C( -61), INT8_C(  31),
8657                         INT8_C(  84), INT8_C(  86), INT8_C(  14), INT8_C(  51),
8658                         INT8_C( -75), INT8_C( -80), INT8_C(  35), INT8_C(  49)),
8659       simde_mm_set_epi8(INT8_C(-102), INT8_C(  55), INT8_C( 103), INT8_C(  19),
8660                         INT8_C(-107), INT8_C( -66), INT8_C(-128), INT8_C(  92),
8661                         INT8_C(-108), INT8_C( -59), INT8_C( -55), INT8_C(  84),
8662                         INT8_C( -42), INT8_C(  42), INT8_C( -85), INT8_C( -73)),
8663       simde_mm_set_epi8(INT8_C(  76), INT8_C( -85), INT8_C(  26), INT8_C(-115),
8664                         INT8_C(  14), INT8_C(-105), INT8_C(  67), INT8_C( -61),
8665                         INT8_C( -64), INT8_C(-111), INT8_C(  69), INT8_C( -33),
8666                         INT8_C( -33), INT8_C(-122), INT8_C( 120), INT8_C( 122)) },
8667     { simde_mm_set_epi8(INT8_C( -92), INT8_C(  56), INT8_C( -22), INT8_C( -76),
8668                         INT8_C( -77), INT8_C(-116), INT8_C( -11), INT8_C(  34),
8669                         INT8_C(  -7), INT8_C(  37), INT8_C( -64), INT8_C( -72),
8670                         INT8_C(  28), INT8_C(-107), INT8_C(-128), INT8_C(-117)),
8671       simde_mm_set_epi8(INT8_C( -56), INT8_C(  -3), INT8_C(  32), INT8_C(  22),
8672                         INT8_C(  49), INT8_C(-125), INT8_C( 122), INT8_C(  -3),
8673                         INT8_C(-111), INT8_C(  65), INT8_C( -17), INT8_C(  15),
8674                         INT8_C( -83), INT8_C( -49), INT8_C(  13), INT8_C(  98)),
8675       simde_mm_set_epi8(INT8_C( -36), INT8_C(  59), INT8_C( -54), INT8_C( -98),
8676                         INT8_C(-126), INT8_C(   9), INT8_C( 123), INT8_C(  37),
8677                         INT8_C( 104), INT8_C( -28), INT8_C( -47), INT8_C( -87),
8678                         INT8_C( 111), INT8_C( -58), INT8_C( 115), INT8_C(  41)) },
8679     { simde_mm_set_epi8(INT8_C(-104), INT8_C(   9), INT8_C(  90), INT8_C( -26),
8680                         INT8_C(-114), INT8_C(-100), INT8_C( -19), INT8_C(  82),
8681                         INT8_C(  96), INT8_C(  58), INT8_C(  39), INT8_C(   9),
8682                         INT8_C(  -4), INT8_C(  91), INT8_C( -93), INT8_C( -73)),
8683       simde_mm_set_epi8(INT8_C(  16), INT8_C(   2), INT8_C(  -9), INT8_C( 107),
8684                         INT8_C(-122), INT8_C(-106), INT8_C(  -7), INT8_C(  11),
8685                         INT8_C( 116), INT8_C( -40), INT8_C(  -9), INT8_C( -94),
8686                         INT8_C(  61), INT8_C( -90), INT8_C(  69), INT8_C(   0)),
8687       simde_mm_set_epi8(INT8_C(-120), INT8_C(   7), INT8_C(  99), INT8_C( 123),
8688                         INT8_C(   8), INT8_C(   6), INT8_C( -12), INT8_C(  71),
8689                         INT8_C( -20), INT8_C(  98), INT8_C(  48), INT8_C( 103),
8690                         INT8_C( -65), INT8_C( -75), INT8_C(  94), INT8_C( -73)) },
8691     { simde_mm_set_epi8(INT8_C( -61), INT8_C( -71), INT8_C( 103), INT8_C(  76),
8692                         INT8_C(  44), INT8_C(  98), INT8_C(  70), INT8_C(-120),
8693                         INT8_C(  17), INT8_C( 126), INT8_C( -43), INT8_C( 108),
8694                         INT8_C( -31), INT8_C(  12), INT8_C( -92), INT8_C( -28)),
8695       simde_mm_set_epi8(INT8_C(-114), INT8_C(  71), INT8_C(  -5), INT8_C(  -9),
8696                         INT8_C(  -6), INT8_C( 117), INT8_C( -23), INT8_C( -62),
8697                         INT8_C( -10), INT8_C( -22), INT8_C( 106), INT8_C(  35),
8698                         INT8_C( -63), INT8_C(  18), INT8_C(  58), INT8_C(  22)),
8699       simde_mm_set_epi8(INT8_C(  53), INT8_C( 114), INT8_C( 108), INT8_C(  85),
8700                         INT8_C(  50), INT8_C( -19), INT8_C(  93), INT8_C( -58),
8701                         INT8_C(  27), INT8_C(-108), INT8_C( 107), INT8_C(  73),
8702                         INT8_C(  32), INT8_C(  -6), INT8_C( 106), INT8_C( -50)) },
8703     { simde_mm_set_epi8(INT8_C(  19), INT8_C( -54), INT8_C(  71), INT8_C(   0),
8704                         INT8_C( -13), INT8_C(  85), INT8_C( 113), INT8_C(   7),
8705                         INT8_C( -78), INT8_C(-122), INT8_C( -69), INT8_C( -15),
8706                         INT8_C( -57), INT8_C(  -9), INT8_C(-125), INT8_C(  84)),
8707       simde_mm_set_epi8(INT8_C( -78), INT8_C( 106), INT8_C(-106), INT8_C(  60),
8708                         INT8_C(  36), INT8_C( 103), INT8_C( -55), INT8_C(  69),
8709                         INT8_C(-119), INT8_C( -53), INT8_C(  67), INT8_C( -86),
8710                         INT8_C( -37), INT8_C( -20), INT8_C( -58), INT8_C( -28)),
8711       simde_mm_set_epi8(INT8_C(  97), INT8_C(  96), INT8_C( -79), INT8_C( -60),
8712                         INT8_C( -49), INT8_C( -18), INT8_C( -88), INT8_C( -62),
8713                         INT8_C(  41), INT8_C( -69), INT8_C( 120), INT8_C(  71),
8714                         INT8_C( -20), INT8_C(  11), INT8_C( -67), INT8_C( 112)) }
8715   };
8716 
8717   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8718     simde__m128i r = simde_mm_sub_epi8(test_vec[i].a, test_vec[i].b);
8719     simde_assert_m128i_i8(r, ==, test_vec[i].r);
8720   }
8721 
8722   return 0;
8723 }
8724 
8725 static int
8726 test_simde_mm_sub_epi16(SIMDE_MUNIT_TEST_ARGS) {
8727   const struct {
8728     simde__m128i a;
8729     simde__m128i b;
8730     simde__m128i r;
8731   } test_vec[8] = {
8732     { simde_mm_set_epi16(INT16_C(  4649), INT16_C(-10562), INT16_C(-25917), INT16_C(  9425),
8733                          INT16_C( 27983), INT16_C( -7888), INT16_C(  3337), INT16_C(-19608)),
8734       simde_mm_set_epi16(INT16_C(-13637), INT16_C( -2631), INT16_C( 26607), INT16_C( 19784),
8735                          INT16_C(-32723), INT16_C(-19066), INT16_C( 18245), INT16_C(-23690)),
8736       simde_mm_set_epi16(INT16_C( 18286), INT16_C( -7931), INT16_C( 13012), INT16_C(-10359),
8737                          INT16_C( -4830), INT16_C( 11178), INT16_C(-14908), INT16_C(  4082)) },
8738     { simde_mm_set_epi16(INT16_C(   708), INT16_C( 11434), INT16_C( -1239), INT16_C(-25521),
8739                          INT16_C(-21333), INT16_C( 14389), INT16_C(  1705), INT16_C( 20680)),
8740       simde_mm_set_epi16(INT16_C(-28483), INT16_C(  8156), INT16_C(-22073), INT16_C( 17984),
8741                          INT16_C( 20902), INT16_C(  3569), INT16_C( 31387), INT16_C(  7806)),
8742       simde_mm_set_epi16(INT16_C( 29191), INT16_C(  3278), INT16_C( 20834), INT16_C( 22031),
8743                          INT16_C( 23301), INT16_C( 10820), INT16_C(-29682), INT16_C( 12874)) },
8744     { simde_mm_set_epi16(INT16_C( -3626), INT16_C(   757), INT16_C(   189), INT16_C(-19968),
8745                          INT16_C(  5676), INT16_C(  7663), INT16_C(  8524), INT16_C( 15372)),
8746       simde_mm_set_epi16(INT16_C( 20254), INT16_C(-31977), INT16_C( 18332), INT16_C(-14379),
8747                          INT16_C( -7613), INT16_C( 19737), INT16_C( 22035), INT16_C( -6952)),
8748       simde_mm_set_epi16(INT16_C(-23880), INT16_C( 32734), INT16_C(-18143), INT16_C( -5589),
8749                          INT16_C( 13289), INT16_C(-12074), INT16_C(-13511), INT16_C( 22324)) },
8750     { simde_mm_set_epi16(INT16_C(-12411), INT16_C( 25999), INT16_C(  8485), INT16_C( -8542),
8751                          INT16_C( 21018), INT16_C(-31213), INT16_C( 15766), INT16_C( 18574)),
8752       simde_mm_set_epi16(INT16_C(  6484), INT16_C(-10154), INT16_C( 20175), INT16_C( 32085),
8753                          INT16_C( 18950), INT16_C(-19405), INT16_C(-12089), INT16_C(  8199)),
8754       simde_mm_set_epi16(INT16_C(-18895), INT16_C(-29383), INT16_C(-11690), INT16_C( 24909),
8755                          INT16_C(  2068), INT16_C(-11808), INT16_C( 27855), INT16_C( 10375)) },
8756     { simde_mm_set_epi16(INT16_C(  7148), INT16_C(-25537), INT16_C(  5647), INT16_C(-25529),
8757                          INT16_C( -5324), INT16_C(-12025), INT16_C( 27072), INT16_C(-30360)),
8758       simde_mm_set_epi16(INT16_C(-24506), INT16_C( -9630), INT16_C( 25801), INT16_C( 32734),
8759                          INT16_C(  1516), INT16_C( 10059), INT16_C( 10693), INT16_C( 13623)),
8760       simde_mm_set_epi16(INT16_C( 31654), INT16_C(-15907), INT16_C(-20154), INT16_C(  7273),
8761                          INT16_C( -6840), INT16_C(-22084), INT16_C( 16379), INT16_C( 21553)) },
8762     { simde_mm_set_epi16(INT16_C(-24730), INT16_C(-23496), INT16_C(-16567), INT16_C(-13323),
8763                          INT16_C(-12986), INT16_C(-31808), INT16_C( 27730), INT16_C( -2264)),
8764       simde_mm_set_epi16(INT16_C(-13737), INT16_C(-18451), INT16_C(-16289), INT16_C( 22307),
8765                          INT16_C( -2961), INT16_C( 22412), INT16_C( 13917), INT16_C(  8259)),
8766       simde_mm_set_epi16(INT16_C(-10993), INT16_C( -5045), INT16_C(  -278), INT16_C( 29906),
8767                          INT16_C(-10025), INT16_C( 11316), INT16_C( 13813), INT16_C(-10523)) },
8768     { simde_mm_set_epi16(INT16_C(  5718), INT16_C( 31027), INT16_C( 29094), INT16_C(  1906),
8769                          INT16_C( -3938), INT16_C( -2339), INT16_C(-13536), INT16_C( 11931)),
8770       simde_mm_set_epi16(INT16_C(-23545), INT16_C(  2546), INT16_C( -2953), INT16_C( -8072),
8771                          INT16_C( 28237), INT16_C(-11239), INT16_C(-13996), INT16_C( 29497)),
8772       simde_mm_set_epi16(INT16_C( 29263), INT16_C( 28481), INT16_C( 32047), INT16_C(  9978),
8773                          INT16_C(-32175), INT16_C(  8900), INT16_C(   460), INT16_C(-17566)) },
8774     { simde_mm_set_epi16(INT16_C( 29491), INT16_C(-30965), INT16_C(  4748), INT16_C(-28809),
8775                          INT16_C(-21877), INT16_C(-21669), INT16_C(-28233), INT16_C(-28758)),
8776       simde_mm_set_epi16(INT16_C(  5029), INT16_C(  4694), INT16_C(-16956), INT16_C(-15561),
8777                          INT16_C(-23049), INT16_C(-31774), INT16_C(  3835), INT16_C(-12557)),
8778       simde_mm_set_epi16(INT16_C( 24462), INT16_C( 29877), INT16_C( 21704), INT16_C(-13248),
8779                          INT16_C(  1172), INT16_C( 10105), INT16_C(-32068), INT16_C(-16201)) }
8780   };
8781 
8782   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8783     simde__m128i r = simde_mm_sub_epi16(test_vec[i].a, test_vec[i].b);
8784     simde_assert_m128i_i16(r, ==, test_vec[i].r);
8785   }
8786 
8787   return 0;
8788 }
8789 
8790 static int
8791 test_simde_mm_sub_epi32(SIMDE_MUNIT_TEST_ARGS) {
8792   const struct {
8793     simde__m128i a;
8794     simde__m128i b;
8795     simde__m128i r;
8796   } test_vec[8] = {
8797     { simde_mm_set_epi32(  304731838, -1698487087,  1833951536,   218739560),
8798       simde_mm_set_epi32( -893651527,  1743736136, -2144488058,  1195746166),
8799       simde_mm_set_epi32( 1198383365,   852744073,  -316527702,  -977006606) },
8800     { simde_mm_set_epi32(   46410922,   -81159089, -1398065099,   111759560),
8801       simde_mm_set_epi32(-1866653732, -1446558144,  1369837041,  2056986238),
8802       simde_mm_set_epi32( 1913064654,  1365399055,  1527065156, -1945226678) },
8803     { simde_mm_set_epi32( -237632779,    12431872,   371989999,   558644236),
8804       simde_mm_set_epi32( 1327399703,  1201457109,  -498905831,  1444144344),
8805       simde_mm_set_epi32(-1565032482, -1189025237,   870895830,  -885500108) },
8806     { simde_mm_set_epi32( -813341297,   556129954,  1377469971,  1033259150),
8807       simde_mm_set_epi32(  424990806,  1322220885,  1241953331,  -792256505),
8808       simde_mm_set_epi32(-1238332103,  -766090931,   135516640,  1825515655) },
8809     { simde_mm_set_epi32(  468491327,   370121799,  -348860153,  1774225768),
8810       simde_mm_set_epi32(-1605969310,  1690927070,    99362635,   700790071),
8811       simde_mm_set_epi32( 2074460637, -1320805271,  -448222788,  1073435697) },
8812     { simde_mm_set_epi32(-1620663240, -1085682699,  -851016768,  1817376552),
8813       simde_mm_set_epi32( -900220947, -1067493597,  -194029684,   912072771),
8814       simde_mm_set_epi32( -720442293,   -18189102,  -656987084,   905303781) },
8815     { simde_mm_set_epi32(  374765875,  1906706290,  -258017571,  -887083365),
8816       simde_mm_set_epi32(-1543042574,  -193470344,  1850594329,  -917212359),
8817       simde_mm_set_epi32( 1917808449,  2100176634, -2108611900,    30128994) },
8818     { simde_mm_set_epi32( 1932756747,   311201655, -1433687205, -1850241110),
8819       simde_mm_set_epi32(  329585238, -1111178441, -1510505502,   251383539),
8820       simde_mm_set_epi32( 1603171509,  1422380096,    76818297, -2101624649) }
8821   };
8822 
8823   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8824     simde__m128i r = simde_mm_sub_epi32(test_vec[i].a, test_vec[i].b);
8825     simde_assert_m128i_i32(r, ==, test_vec[i].r);
8826   }
8827 
8828   return 0;
8829 }
8830 
8831 static int
8832 test_simde_mm_sub_epi64(SIMDE_MUNIT_TEST_ARGS) {
8833   const struct {
8834     simde__m128i a;
8835     simde__m128i b;
8836     simde__m128i r;
8837   } test_vec[8] = {
8838     { simde_mm_set_epi64x(INT64_C(-5763845342482697816), INT64_C( 2103077785434280804)),
8839       simde_mm_set_epi64x(INT64_C(  -84933559585222060), INT64_C(-2626653918467514964)),
8840       simde_mm_set_epi64x(INT64_C(-5678911782897475756), INT64_C( 4729731703901795768)) },
8841     { simde_mm_set_epi64x(INT64_C( 1527789798480118137), INT64_C( 8436112421047310932)),
8842       simde_mm_set_epi64x(INT64_C(-1700732467797798250), INT64_C(-3973336518996013340)),
8843       simde_mm_set_epi64x(INT64_C( 3228522266277916387), INT64_C(-6037295133666227344)) },
8844     { simde_mm_set_epi64x(INT64_C( 4851345631989659335), INT64_C(-7206764788471565568)),
8845       simde_mm_set_epi64x(INT64_C( 5406657072094052149), INT64_C( 1553917979932899417)),
8846       simde_mm_set_epi64x(INT64_C( -555311440104392814), INT64_C(-8760682768404464985)) },
8847     { simde_mm_set_epi64x(INT64_C( 4880585840903485916), INT64_C(-3214111508108965857)),
8848       simde_mm_set_epi64x(INT64_C(-9030069389987018552), INT64_C(-3395779442469856546)),
8849       simde_mm_set_epi64x(INT64_C(-4536088842819047148), INT64_C(  181667934360890689)) },
8850     { simde_mm_set_epi64x(INT64_C( 5848110560047382754), INT64_C( 5491947693722128435)),
8851       simde_mm_set_epi64x(INT64_C(  213782131019667117), INT64_C( -937970910639813333)),
8852       simde_mm_set_epi64x(INT64_C( 5634328429027715637), INT64_C( 6429918604361941768)) },
8853     { simde_mm_set_epi64x(INT64_C(-1470278109522038956), INT64_C(-7185464081677005028)),
8854       simde_mm_set_epi64x(INT64_C( 7512013344600346304), INT64_C(-1151368750409397152)),
8855       simde_mm_set_epi64x(INT64_C(-8982291454122385260), INT64_C(-6034095331267607876)) },
8856     { simde_mm_set_epi64x(INT64_C(-1414880571892272072), INT64_C(-5464559564131319132)),
8857       simde_mm_set_epi64x(INT64_C(-8460263392275774431), INT64_C( 3444946385257741717)),
8858       simde_mm_set_epi64x(INT64_C( 7045382820383502359), INT64_C(-8909505949389060849)) },
8859     { simde_mm_set_epi64x(INT64_C( 2041037654020608990), INT64_C(-5135476174064773616)),
8860       simde_mm_set_epi64x(INT64_C(-2250411574230731306), INT64_C( 6301008926808412830)),
8861       simde_mm_set_epi64x(INT64_C( 4291449228251340296), INT64_C( 7010258972836365170)) }
8862   };
8863 
8864   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8865     simde__m128i r = simde_mm_sub_epi64(test_vec[i].a, test_vec[i].b);
8866     simde_assert_m128i_i64(r, ==, test_vec[i].r);
8867   }
8868 
8869   return 0;
8870 }
8871 
8872 static int
8873 test_simde_mm_sub_pd(SIMDE_MUNIT_TEST_ARGS) {
8874   const struct {
8875     simde__m128d a;
8876     simde__m128d b;
8877     simde__m128d r;
8878   } test_vec[8] = {
8879     { simde_mm_set_pd(SIMDE_FLOAT64_C( -989.09), SIMDE_FLOAT64_C(  415.70)),
8880       simde_mm_set_pd(SIMDE_FLOAT64_C( -630.71), SIMDE_FLOAT64_C(  755.53)),
8881       simde_mm_set_pd(SIMDE_FLOAT64_C( -358.38), SIMDE_FLOAT64_C( -339.83)) },
8882     { simde_mm_set_pd(SIMDE_FLOAT64_C( -609.69), SIMDE_FLOAT64_C( -266.09)),
8883       simde_mm_set_pd(SIMDE_FLOAT64_C(  904.74), SIMDE_FLOAT64_C(  704.00)),
8884       simde_mm_set_pd(SIMDE_FLOAT64_C(-1514.43), SIMDE_FLOAT64_C( -970.09)) },
8885     { simde_mm_set_pd(SIMDE_FLOAT64_C( -864.69), SIMDE_FLOAT64_C( -728.75)),
8886       simde_mm_set_pd(SIMDE_FLOAT64_C( -549.96), SIMDE_FLOAT64_C(  478.05)),
8887       simde_mm_set_pd(SIMDE_FLOAT64_C( -314.73), SIMDE_FLOAT64_C(-1206.80)) },
8888     { simde_mm_set_pd(SIMDE_FLOAT64_C( -607.45), SIMDE_FLOAT64_C( -593.32)),
8889       simde_mm_set_pd(SIMDE_FLOAT64_C( -648.70), SIMDE_FLOAT64_C( -195.04)),
8890       simde_mm_set_pd(SIMDE_FLOAT64_C(   41.24), SIMDE_FLOAT64_C( -398.28)) },
8891     { simde_mm_set_pd(SIMDE_FLOAT64_C( -442.58), SIMDE_FLOAT64_C( -296.11)),
8892       simde_mm_set_pd(SIMDE_FLOAT64_C(  195.46), SIMDE_FLOAT64_C(  287.25)),
8893       simde_mm_set_pd(SIMDE_FLOAT64_C( -638.04), SIMDE_FLOAT64_C( -583.37)) },
8894     { simde_mm_set_pd(SIMDE_FLOAT64_C( -930.71), SIMDE_FLOAT64_C(  996.22)),
8895       simde_mm_set_pd(SIMDE_FLOAT64_C( -786.74), SIMDE_FLOAT64_C(   77.74)),
8896       simde_mm_set_pd(SIMDE_FLOAT64_C( -143.98), SIMDE_FLOAT64_C(  918.47)) },
8897     { simde_mm_set_pd(SIMDE_FLOAT64_C(  702.75), SIMDE_FLOAT64_C(  -28.87)),
8898       simde_mm_set_pd(SIMDE_FLOAT64_C(  970.37), SIMDE_FLOAT64_C( -443.97)),
8899       simde_mm_set_pd(SIMDE_FLOAT64_C( -267.62), SIMDE_FLOAT64_C(  415.10)) },
8900     { simde_mm_set_pd(SIMDE_FLOAT64_C( -369.79), SIMDE_FLOAT64_C(  539.64)),
8901       simde_mm_set_pd(SIMDE_FLOAT64_C( -404.57), SIMDE_FLOAT64_C( -587.93)),
8902       simde_mm_set_pd(SIMDE_FLOAT64_C(   34.78), SIMDE_FLOAT64_C( 1127.56)) }
8903   };
8904 
8905   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8906     simde__m128d r = simde_mm_sub_pd(test_vec[i].a, test_vec[i].b);
8907     simde_assert_m128d_close(r, test_vec[i].r, 1);
8908   }
8909 
8910   return 0;
8911 }
8912 
8913 static int
8914 test_simde_mm_sub_sd(SIMDE_MUNIT_TEST_ARGS) {
8915   const struct {
8916     simde__m128d a;
8917     simde__m128d b;
8918     simde__m128d r;
8919   } test_vec[8] = {
8920     { simde_mm_set_pd(SIMDE_FLOAT64_C( -989.09), SIMDE_FLOAT64_C(  415.70)),
8921       simde_mm_set_pd(SIMDE_FLOAT64_C( -630.71), SIMDE_FLOAT64_C(  755.53)),
8922       simde_mm_set_pd(SIMDE_FLOAT64_C( -989.09), SIMDE_FLOAT64_C( -339.83)) },
8923     { simde_mm_set_pd(SIMDE_FLOAT64_C( -609.69), SIMDE_FLOAT64_C( -266.09)),
8924       simde_mm_set_pd(SIMDE_FLOAT64_C(  904.74), SIMDE_FLOAT64_C(  704.00)),
8925       simde_mm_set_pd(SIMDE_FLOAT64_C( -609.69), SIMDE_FLOAT64_C( -970.09)) },
8926     { simde_mm_set_pd(SIMDE_FLOAT64_C( -864.69), SIMDE_FLOAT64_C( -728.75)),
8927       simde_mm_set_pd(SIMDE_FLOAT64_C( -549.96), SIMDE_FLOAT64_C(  478.05)),
8928       simde_mm_set_pd(SIMDE_FLOAT64_C( -864.69), SIMDE_FLOAT64_C(-1206.80)) },
8929     { simde_mm_set_pd(SIMDE_FLOAT64_C( -607.45), SIMDE_FLOAT64_C( -593.32)),
8930       simde_mm_set_pd(SIMDE_FLOAT64_C( -648.70), SIMDE_FLOAT64_C( -195.04)),
8931       simde_mm_set_pd(SIMDE_FLOAT64_C( -607.45), SIMDE_FLOAT64_C( -398.28)) },
8932     { simde_mm_set_pd(SIMDE_FLOAT64_C( -442.58), SIMDE_FLOAT64_C( -296.11)),
8933       simde_mm_set_pd(SIMDE_FLOAT64_C(  195.46), SIMDE_FLOAT64_C(  287.25)),
8934       simde_mm_set_pd(SIMDE_FLOAT64_C( -442.58), SIMDE_FLOAT64_C( -583.37)) },
8935     { simde_mm_set_pd(SIMDE_FLOAT64_C( -930.71), SIMDE_FLOAT64_C(  996.22)),
8936       simde_mm_set_pd(SIMDE_FLOAT64_C( -786.74), SIMDE_FLOAT64_C(   77.74)),
8937       simde_mm_set_pd(SIMDE_FLOAT64_C( -930.71), SIMDE_FLOAT64_C(  918.47)) },
8938     { simde_mm_set_pd(SIMDE_FLOAT64_C(  702.75), SIMDE_FLOAT64_C(  -28.87)),
8939       simde_mm_set_pd(SIMDE_FLOAT64_C(  970.37), SIMDE_FLOAT64_C( -443.97)),
8940       simde_mm_set_pd(SIMDE_FLOAT64_C(  702.75), SIMDE_FLOAT64_C(  415.10)) },
8941     { simde_mm_set_pd(SIMDE_FLOAT64_C( -369.79), SIMDE_FLOAT64_C(  539.64)),
8942       simde_mm_set_pd(SIMDE_FLOAT64_C( -404.57), SIMDE_FLOAT64_C( -587.93)),
8943       simde_mm_set_pd(SIMDE_FLOAT64_C( -369.79), SIMDE_FLOAT64_C( 1127.56)) }
8944   };
8945 
8946   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8947     simde__m128d r = simde_mm_sub_sd(test_vec[i].a, test_vec[i].b);
8948     simde_assert_m128d_close(r, test_vec[i].r, 1);
8949   }
8950 
8951   return 0;
8952 }
8953 
8954 static int
8955 test_simde_mm_sub_si64(SIMDE_MUNIT_TEST_ARGS) {
8956   const struct {
8957     simde__m64 a;
8958     simde__m64 b;
8959     simde__m64 r;
8960   } test_vec[8] = {
8961     { simde_mm_cvtsi64_m64(INT64_C(  -40015113898169895)),
8962       simde_mm_cvtsi64_m64(INT64_C( 3843942487505240466)),
8963       simde_mm_cvtsi64_m64(INT64_C(-3883957601403410361)), },
8964     { simde_mm_cvtsi64_m64(INT64_C( 8317116700671824816)),
8965       simde_mm_cvtsi64_m64(INT64_C( 2891842609034633421)),
8966       simde_mm_cvtsi64_m64(INT64_C( 5425274091637191395)), },
8967     { simde_mm_cvtsi64_m64(INT64_C(  922042182678065366)),
8968       simde_mm_cvtsi64_m64(INT64_C( 4937799652981992213)),
8969       simde_mm_cvtsi64_m64(INT64_C(-4015757470303926847)), },
8970     { simde_mm_cvtsi64_m64(INT64_C(  297526191920431793)),
8971       simde_mm_cvtsi64_m64(INT64_C(-8568639315346032946)),
8972       simde_mm_cvtsi64_m64(INT64_C( 8866165507266464739)), },
8973     { simde_mm_cvtsi64_m64(INT64_C(  944913740190663659)),
8974       simde_mm_cvtsi64_m64(INT64_C(-5569388163200780530)),
8975       simde_mm_cvtsi64_m64(INT64_C( 6514301903391444189)), },
8976     { simde_mm_cvtsi64_m64(INT64_C( 2756927115722410076)),
8977       simde_mm_cvtsi64_m64(INT64_C( 1302679549898517242)),
8978       simde_mm_cvtsi64_m64(INT64_C( 1454247565823892834)), },
8979     { simde_mm_cvtsi64_m64(INT64_C(  977005230827305840)),
8980       simde_mm_cvtsi64_m64(INT64_C( 4908563834369883454)),
8981       simde_mm_cvtsi64_m64(INT64_C(-3931558603542577614)), },
8982     { simde_mm_cvtsi64_m64(INT64_C(-7062092201406124762)),
8983       simde_mm_cvtsi64_m64(INT64_C( 2377066878085823882)),
8984       simde_mm_cvtsi64_m64(INT64_C( 9007584994217602972)), }
8985   };
8986 
8987   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
8988     simde__m64 r = simde_mm_sub_si64(test_vec[i].a, test_vec[i].b);
8989     simde_assert_m64_i64(r, ==, test_vec[i].r);
8990   }
8991 
8992   return 0;
8993 }
8994 
8995 static int
8996 test_simde_mm_subs_epi8(SIMDE_MUNIT_TEST_ARGS) {
8997   const struct {
8998     simde__m128i a;
8999     simde__m128i b;
9000     simde__m128i r;
9001   } test_vec[8] = {
9002     { simde_mm_set_epi8(INT8_C(  77), INT8_C( -15), INT8_C( -74), INT8_C(  52), INT8_C( -61), INT8_C(  64), INT8_C(  59), INT8_C(  21),
9003                         INT8_C( -60), INT8_C( -67), INT8_C( -73), INT8_C( 104), INT8_C(  30), INT8_C( 107), INT8_C(  83), INT8_C(-117)),
9004       simde_mm_set_epi8(INT8_C(-110), INT8_C(-112), INT8_C( -53), INT8_C(  89), INT8_C(  74), INT8_C(  81), INT8_C( -22), INT8_C( -97),
9005                         INT8_C(-128), INT8_C( -54), INT8_C( 101), INT8_C(  96), INT8_C(  36), INT8_C(  87), INT8_C(-125), INT8_C(  28)),
9006       simde_mm_set_epi8(INT8_C( 127), INT8_C(  97), INT8_C( -21), INT8_C( -37), INT8_C(-128), INT8_C( -17), INT8_C(  81), INT8_C( 118),
9007                         INT8_C(  68), INT8_C( -13), INT8_C(-128), INT8_C(   8), INT8_C(  -6), INT8_C(  20), INT8_C( 127), INT8_C(-128)) },
9008     { simde_mm_set_epi8(INT8_C(  57), INT8_C(  79), INT8_C( 101), INT8_C(  47), INT8_C(  60), INT8_C(  12), INT8_C(   0), INT8_C( -19),
9009                         INT8_C(  63), INT8_C(  39), INT8_C(-108), INT8_C(  37), INT8_C(  92), INT8_C( 114), INT8_C(-110), INT8_C(  91)),
9010       simde_mm_set_epi8(INT8_C( -59), INT8_C( -81), INT8_C(  49), INT8_C( 126), INT8_C(  33), INT8_C( 120), INT8_C(-127), INT8_C(  80),
9011                         INT8_C( 109), INT8_C(-100), INT8_C(  21), INT8_C(-125), INT8_C(   7), INT8_C(  60), INT8_C(-122), INT8_C( -61)),
9012       simde_mm_set_epi8(INT8_C( 116), INT8_C( 127), INT8_C(  52), INT8_C( -79), INT8_C(  27), INT8_C(-108), INT8_C( 127), INT8_C( -99),
9013                         INT8_C( -46), INT8_C( 127), INT8_C(-128), INT8_C( 127), INT8_C(  85), INT8_C(  54), INT8_C(  12), INT8_C( 127)) },
9014     { simde_mm_set_epi8(INT8_C(  84), INT8_C(-104), INT8_C( -82), INT8_C( 105), INT8_C( -43), INT8_C( -36), INT8_C(  16), INT8_C( -15),
9015                         INT8_C( -34), INT8_C( 120), INT8_C(-110), INT8_C(  90), INT8_C(  78), INT8_C(  45), INT8_C(-124), INT8_C( -84)),
9016       simde_mm_set_epi8(INT8_C( -66), INT8_C(  -1), INT8_C(  91), INT8_C(  74), INT8_C(  83), INT8_C( -91), INT8_C( -97), INT8_C( 115),
9017                         INT8_C( -29), INT8_C(  67), INT8_C( -98), INT8_C( -51), INT8_C( 110), INT8_C( -43), INT8_C( 125), INT8_C(  63)),
9018       simde_mm_set_epi8(INT8_C( 127), INT8_C(-103), INT8_C(-128), INT8_C(  31), INT8_C(-126), INT8_C(  55), INT8_C( 113), INT8_C(-128),
9019                         INT8_C(  -5), INT8_C(  53), INT8_C( -12), INT8_C( 127), INT8_C( -32), INT8_C(  88), INT8_C(-128), INT8_C(-128)) },
9020     { simde_mm_set_epi8(INT8_C( -75), INT8_C(  37), INT8_C( 126), INT8_C(  21), INT8_C(  92), INT8_C(-124), INT8_C( -81), INT8_C(  -6),
9021                         INT8_C(-117), INT8_C( -14), INT8_C(  38), INT8_C( -68), INT8_C( -45), INT8_C( 114), INT8_C(  32), INT8_C( -13)),
9022       simde_mm_set_epi8(INT8_C(  80), INT8_C(-123), INT8_C( -25), INT8_C(  71), INT8_C(-108), INT8_C( -31), INT8_C(  98), INT8_C( -67),
9023                         INT8_C( -23), INT8_C(-112), INT8_C( -42), INT8_C( -16), INT8_C( -56), INT8_C( 107), INT8_C(   6), INT8_C(  16)),
9024       simde_mm_set_epi8(INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( -50), INT8_C( 127), INT8_C( -93), INT8_C(-128), INT8_C(  61),
9025                         INT8_C( -94), INT8_C(  98), INT8_C(  80), INT8_C( -52), INT8_C(  11), INT8_C(   7), INT8_C(  26), INT8_C( -29)) },
9026     { simde_mm_set_epi8(INT8_C(  85), INT8_C(  18), INT8_C(  15), INT8_C( 100), INT8_C( 107), INT8_C( -69), INT8_C( -96), INT8_C( -20),
9027                         INT8_C( -18), INT8_C(  42), INT8_C(  98), INT8_C( 104), INT8_C( -70), INT8_C(-121), INT8_C( -91), INT8_C(  77)),
9028       simde_mm_set_epi8(INT8_C( 103), INT8_C( -62), INT8_C( 107), INT8_C(-125), INT8_C( -86), INT8_C(-112), INT8_C( -45), INT8_C(   3),
9029                         INT8_C( -26), INT8_C(  96), INT8_C(  83), INT8_C(  23), INT8_C( 100), INT8_C( 127), INT8_C( -56), INT8_C( -52)),
9030       simde_mm_set_epi8(INT8_C( -18), INT8_C(  80), INT8_C( -92), INT8_C( 127), INT8_C( 127), INT8_C(  43), INT8_C( -51), INT8_C( -23),
9031                         INT8_C(   8), INT8_C( -54), INT8_C(  15), INT8_C(  81), INT8_C(-128), INT8_C(-128), INT8_C( -35), INT8_C( 127)) },
9032     { simde_mm_set_epi8(INT8_C(  63), INT8_C(  16), INT8_C( 100), INT8_C( -10), INT8_C(  78), INT8_C( 116), INT8_C( -91), INT8_C(  21),
9033                         INT8_C( -10), INT8_C( -27), INT8_C( -92), INT8_C(  31), INT8_C( -23), INT8_C( -53), INT8_C(  -1), INT8_C(  -1)),
9034       simde_mm_set_epi8(INT8_C(  20), INT8_C(-123), INT8_C(  36), INT8_C( -10), INT8_C( 127), INT8_C(-111), INT8_C( -60), INT8_C(  54),
9035                         INT8_C(  92), INT8_C( 101), INT8_C( -13), INT8_C( -31), INT8_C(-124), INT8_C( 112), INT8_C(-118), INT8_C( -29)),
9036       simde_mm_set_epi8(INT8_C(  43), INT8_C( 127), INT8_C(  64), INT8_C(   0), INT8_C( -49), INT8_C( 127), INT8_C( -31), INT8_C( -33),
9037                         INT8_C(-102), INT8_C(-128), INT8_C( -79), INT8_C(  62), INT8_C( 101), INT8_C(-128), INT8_C( 117), INT8_C(  28)) },
9038     { simde_mm_set_epi8(INT8_C(   1), INT8_C( -28), INT8_C( -45), INT8_C( -32), INT8_C(-103), INT8_C(  27), INT8_C( -38), INT8_C(-127),
9039                         INT8_C( -89), INT8_C( -74), INT8_C(  47), INT8_C(  91), INT8_C(  46), INT8_C( -24), INT8_C(  60), INT8_C(  23)),
9040       simde_mm_set_epi8(INT8_C( -25), INT8_C( -68), INT8_C(-116), INT8_C(  92), INT8_C(  33), INT8_C(  -5), INT8_C( -35), INT8_C( -44),
9041                         INT8_C(  -9), INT8_C( -90), INT8_C(  63), INT8_C( 108), INT8_C(  36), INT8_C(  27), INT8_C( 112), INT8_C( -11)),
9042       simde_mm_set_epi8(INT8_C(  26), INT8_C(  40), INT8_C(  71), INT8_C(-124), INT8_C(-128), INT8_C(  32), INT8_C(  -3), INT8_C( -83),
9043                         INT8_C( -80), INT8_C(  16), INT8_C( -16), INT8_C( -17), INT8_C(  10), INT8_C( -51), INT8_C( -52), INT8_C(  34)) },
9044     { simde_mm_set_epi8(INT8_C(  29), INT8_C( 123), INT8_C(  -8), INT8_C( -35), INT8_C(   3), INT8_C( -97), INT8_C( 124), INT8_C(-121),
9045                         INT8_C(  52), INT8_C(  75), INT8_C( -93), INT8_C(-127), INT8_C( -78), INT8_C(  87), INT8_C( 102), INT8_C( 119)),
9046       simde_mm_set_epi8(INT8_C(  51), INT8_C( -89), INT8_C(  -6), INT8_C(   8), INT8_C( -19), INT8_C( -88), INT8_C(  22), INT8_C(  21),
9047                         INT8_C( -37), INT8_C( -42), INT8_C( -97), INT8_C(  58), INT8_C(  70), INT8_C( -92), INT8_C(-100), INT8_C(-124)),
9048       simde_mm_set_epi8(INT8_C( -22), INT8_C( 127), INT8_C(  -2), INT8_C( -43), INT8_C(  22), INT8_C(  -9), INT8_C( 102), INT8_C(-128),
9049                         INT8_C(  89), INT8_C( 117), INT8_C(   4), INT8_C(-128), INT8_C(-128), INT8_C( 127), INT8_C( 127), INT8_C( 127)) }
9050   };
9051 
9052   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9053     simde__m128i r = simde_mm_subs_epi8(test_vec[i].a, test_vec[i].b);
9054     simde_assert_m128i_i8(r, ==, test_vec[i].r);
9055   }
9056 
9057   return 0;
9058 }
9059 
9060 static int
9061 test_simde_mm_subs_epi16(SIMDE_MUNIT_TEST_ARGS) {
9062   const struct {
9063     simde__m128i a;
9064     simde__m128i b;
9065     simde__m128i r;
9066   } test_vec[8] = {
9067     { simde_mm_set_epi16(INT16_C(  3087), INT16_C(-11046), INT16_C( 16009), INT16_C( -2784),
9068                          INT16_C( 23836), INT16_C(   341), INT16_C( 25120), INT16_C(   792)),
9069       simde_mm_set_epi16(INT16_C(-13938), INT16_C( 11942), INT16_C( 18967), INT16_C(-24666),
9070                          INT16_C(-12194), INT16_C(-15141), INT16_C( 27868), INT16_C(  7667)),
9071       simde_mm_set_epi16(INT16_C( 17025), INT16_C(-22988), INT16_C( -2958), INT16_C( 21882),
9072                          INT16_C( 32767), INT16_C( 15482), INT16_C( -2748), INT16_C( -6875)) },
9073     { simde_mm_set_epi16(INT16_C( 15944), INT16_C( 21174), INT16_C(-19487), INT16_C( 30166),
9074                          INT16_C(  9880), INT16_C(  2293), INT16_C(  1544), INT16_C(  6216)),
9075       simde_mm_set_epi16(INT16_C(-22637), INT16_C( 27460), INT16_C( 16112), INT16_C(-21899),
9076                          INT16_C( 28784), INT16_C(  -234), INT16_C( -5361), INT16_C( 25377)),
9077       simde_mm_set_epi16(INT16_C( 32767), INT16_C( -6286), INT16_C(-32768), INT16_C( 32767),
9078                          INT16_C(-18904), INT16_C(  2527), INT16_C(  6905), INT16_C(-19161)) },
9079     { simde_mm_set_epi16(INT16_C( 25177), INT16_C( 16000), INT16_C(-30398), INT16_C(-17760),
9080                          INT16_C( 16727), INT16_C( -4856), INT16_C(-10813), INT16_C( 11418)),
9081       simde_mm_set_epi16(INT16_C( 25832), INT16_C(-14964), INT16_C( 17267), INT16_C( -2360),
9082                          INT16_C( 15960), INT16_C( 12601), INT16_C(  9707), INT16_C( 24108)),
9083       simde_mm_set_epi16(INT16_C(  -655), INT16_C( 30964), INT16_C(-32768), INT16_C(-15400),
9084                          INT16_C(   767), INT16_C(-17457), INT16_C(-20520), INT16_C(-12690)) },
9085     { simde_mm_set_epi16(INT16_C(-19601), INT16_C(-21914), INT16_C(-30623), INT16_C( -8160),
9086                          INT16_C( 24427), INT16_C(-16073), INT16_C( 14239), INT16_C( 20391)),
9087       simde_mm_set_epi16(INT16_C(-19582), INT16_C(-27440), INT16_C( -9450), INT16_C(-25104),
9088                          INT16_C( 11842), INT16_C(  4749), INT16_C(  3094), INT16_C( 19163)),
9089       simde_mm_set_epi16(INT16_C(   -19), INT16_C(  5526), INT16_C(-21173), INT16_C( 16944),
9090                          INT16_C( 12585), INT16_C(-20822), INT16_C( 11145), INT16_C(  1228)) },
9091     { simde_mm_set_epi16(INT16_C(-10118), INT16_C( 25388), INT16_C(-18110), INT16_C( -8312),
9092                          INT16_C(  5249), INT16_C( 27800), INT16_C(  2023), INT16_C(   338)),
9093       simde_mm_set_epi16(INT16_C( 14501), INT16_C( 30804), INT16_C( 26885), INT16_C(-32444),
9094                          INT16_C(-27012), INT16_C(-14925), INT16_C(-31013), INT16_C( 10807)),
9095       simde_mm_set_epi16(INT16_C(-24619), INT16_C( -5416), INT16_C(-32768), INT16_C( 24132),
9096                          INT16_C( 32261), INT16_C( 32767), INT16_C( 32767), INT16_C(-10469)) },
9097     { simde_mm_set_epi16(INT16_C(-17246), INT16_C(-28624), INT16_C( 13423), INT16_C( 27394),
9098                          INT16_C(  7877), INT16_C(-20368), INT16_C(-24205), INT16_C(-15569)),
9099       simde_mm_set_epi16(INT16_C(-21987), INT16_C( -4056), INT16_C(  2917), INT16_C( 23573),
9100                          INT16_C( -2283), INT16_C( 21821), INT16_C( 32369), INT16_C( 26504)),
9101       simde_mm_set_epi16(INT16_C(  4741), INT16_C(-24568), INT16_C( 10506), INT16_C(  3821),
9102                          INT16_C( 10160), INT16_C(-32768), INT16_C(-32768), INT16_C(-32768)) },
9103     { simde_mm_set_epi16(INT16_C(-10290), INT16_C( 29918), INT16_C(-29258), INT16_C(-28749),
9104                          INT16_C(  6048), INT16_C(-25677), INT16_C( 24207), INT16_C(   366)),
9105       simde_mm_set_epi16(INT16_C( 13339), INT16_C(-11229), INT16_C( 23811), INT16_C(  -333),
9106                          INT16_C(-29847), INT16_C( 21714), INT16_C(  2843), INT16_C( -2618)),
9107       simde_mm_set_epi16(INT16_C(-23629), INT16_C( 32767), INT16_C(-32768), INT16_C(-28416),
9108                          INT16_C( 32767), INT16_C(-32768), INT16_C( 21364), INT16_C(  2984)) },
9109     { simde_mm_set_epi16(INT16_C(   824), INT16_C( 19299), INT16_C(-14246), INT16_C(-19942),
9110                          INT16_C( 17549), INT16_C(  5220), INT16_C(-11590), INT16_C(-29570)),
9111       simde_mm_set_epi16(INT16_C( 30144), INT16_C(-11230), INT16_C(-24828), INT16_C( 29586),
9112                          INT16_C( 29999), INT16_C( 25519), INT16_C(  5645), INT16_C( 16976)),
9113       simde_mm_set_epi16(INT16_C(-29320), INT16_C( 30529), INT16_C( 10582), INT16_C(-32768),
9114                          INT16_C(-12450), INT16_C(-20299), INT16_C(-17235), INT16_C(-32768)) }
9115   };
9116 
9117   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9118     simde__m128i r = simde_mm_subs_epi16(test_vec[i].a, test_vec[i].b);
9119     simde_assert_m128i_i16(r, ==, test_vec[i].r);
9120   }
9121 
9122   return 0;
9123 }
9124 
9125 static int
9126 test_simde_mm_subs_epu8(SIMDE_MUNIT_TEST_ARGS) {
9127   const struct {
9128     simde__m128i a;
9129     simde__m128i b;
9130     simde__m128i r;
9131   } test_vec[8] = {
9132     { simde_x_mm_set_epu8( 29,  76, 238,  61, 229, 243, 175, 238,
9133                            75,  27, 166, 154, 166, 157, 121, 248),
9134       simde_x_mm_set_epu8(129,  19, 253, 149,   9, 247,  10, 249,
9135                           105, 205, 179, 225, 124, 146,  91, 221),
9136       simde_x_mm_set_epu8(  0,  57,   0,   0, 220,   0, 165,   0,
9137                             0,   0,   0,   0,  42,  11,  30,  27) },
9138     { simde_x_mm_set_epu8(101, 150, 221,  18, 105, 115, 165,  92,
9139                           211,  64,  38,  72, 139,   6,  65, 201),
9140       simde_x_mm_set_epu8(124, 107, 110,  57, 116, 209, 153,  76,
9141                           122,  56,  60, 234, 120, 132,   4,  95),
9142       simde_x_mm_set_epu8(  0,  43, 111,   0,   0,   0,  12,  16,
9143                            89,   8,   0,   0,  19,   0,  61, 106) },
9144     { simde_x_mm_set_epu8(198, 232, 134,  13, 155, 189, 203,  84,
9145                           209, 255, 163, 211,  57, 177,  19,  86),
9146       simde_x_mm_set_epu8(205,  92, 216, 169, 196, 192,  93, 101,
9147                           208, 230, 232,  36,  70, 151, 125,  72),
9148       simde_x_mm_set_epu8(  0, 140,   0,   0,   0,   0, 110,   0,
9149                             1,  25,   0, 175,   0,  26,   0,  14) },
9150     { simde_x_mm_set_epu8(150, 141, 253,  10, 218, 100, 243,  17,
9151                            87,  99, 224, 222, 198, 181,  26,  41),
9152       simde_x_mm_set_epu8(221, 130, 146,  56,  57, 169,  46,  50,
9153                           234,  43,   8, 172,  95,  74,  51, 101),
9154       simde_x_mm_set_epu8(  0,  11, 107,   0, 161,   0, 197,   0,
9155                             0,  56, 216,  50, 103, 107,   0,   0) },
9156     { simde_x_mm_set_epu8( 91, 188, 127, 216,  55, 208,  83,  14,
9157                           153, 114,  48, 224,  59,  66, 100,  10),
9158       simde_x_mm_set_epu8( 88,  28,  13,  17,  78,  38,   8, 111,
9159                            57,  44, 184,  85, 188, 182, 235, 151),
9160       simde_x_mm_set_epu8(  3, 160, 114, 199,   0, 170,  75,   0,
9161                            96,  70,   0, 139,   0,   0,   0,   0) },
9162     { simde_x_mm_set_epu8(116,  32, 155, 196,  56,  42,  17, 217,
9163                            51, 162,   4,   4, 150,  83,  16, 147),
9164       simde_x_mm_set_epu8(216, 235, 181, 255,  89, 143,  40,  48,
9165                            52,  24, 160,   9, 162, 223, 243, 117),
9166       simde_x_mm_set_epu8(  0,   0,   0,   0,   0,   0,   0, 169,
9167                             0, 138,   0,   0,   0,   0,   0,  30) },
9168     { simde_x_mm_set_epu8(217, 238, 218, 168,  98, 146,  87, 217,
9169                           135, 103, 179, 182, 128,  74, 156,   3),
9170       simde_x_mm_set_epu8(157,   0, 179, 231, 176,  37, 226, 198,
9171                           145, 138, 239, 164,   0, 170,  52,  61),
9172       simde_x_mm_set_epu8( 60, 238,  39,   0,   0, 109,   0,  19,
9173                             0,   0,   0,  18, 128,   0, 104,   0) },
9174     { simde_x_mm_set_epu8(181,  83, 160, 141,  77, 119, 160, 171,
9175                           112,  95,  47,  88,   0,  90, 237,  18),
9176       simde_x_mm_set_epu8(139, 146,  25, 173,  34,  31, 251, 200,
9177                           190, 131,  23,  41, 246,  91,  98, 221),
9178       simde_x_mm_set_epu8( 42,   0, 135,   0,  43,  88,   0,   0,
9179                             0,   0,  24,  47,   0,   0, 139,   0) }
9180   };
9181 
9182   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9183     simde__m128i r = simde_mm_subs_epu8(test_vec[i].a, test_vec[i].b);
9184     simde_assert_m128i_u8(r, ==, test_vec[i].r);
9185   }
9186 
9187   return 0;
9188 }
9189 
9190 static int
9191 test_simde_mm_subs_epu16(SIMDE_MUNIT_TEST_ARGS) {
9192   const struct {
9193     simde__m128i a;
9194     simde__m128i b;
9195     simde__m128i r;
9196   } test_vec[8] = {
9197     { simde_x_mm_set_epu16(55440, 59202, 42058, 53369, 32796,  7917, 33818, 17136),
9198       simde_x_mm_set_epu16(26104, 52689, 47050, 39249, 59785, 38246, 31610, 10518),
9199       simde_x_mm_set_epu16(29336,  6513,     0, 14120,     0,     0,  2208,  6618) },
9200     { simde_x_mm_set_epu16(34216, 34652, 60066, 36214,  4826, 65416, 55052, 33573),
9201       simde_x_mm_set_epu16(26443, 15803,  4000, 33420, 50076, 27556,  5522, 41665),
9202       simde_x_mm_set_epu16( 7773, 18849, 56066,  2794,     0, 37860, 49530,     0) },
9203     { simde_x_mm_set_epu16(64499, 21603, 35445, 16287, 15728, 23400, 23336, 39270),
9204       simde_x_mm_set_epu16(56255, 54924, 45249, 41636, 27152, 13319, 19428,   768),
9205       simde_x_mm_set_epu16( 8244,     0,     0,     0,     0, 10081,  3908, 38502) },
9206     { simde_x_mm_set_epu16( 1242, 22793, 21812, 57045, 22651, 26751, 59072, 30159),
9207       simde_x_mm_set_epu16(11521, 44413, 36849,   788, 57441, 54148,  2979, 46303),
9208       simde_x_mm_set_epu16(    0,     0,     0, 56257,     0,     0, 56093,     0) },
9209     { simde_x_mm_set_epu16(37620, 40488, 64998, 40075, 44204, 34122, 59592, 65445),
9210       simde_x_mm_set_epu16(40351, 64891, 27393, 62063,  1981, 56033, 30691, 62006),
9211       simde_x_mm_set_epu16(    0,     0, 37605,     0, 42223,     0, 28901,  3439) },
9212     { simde_x_mm_set_epu16(65230, 30209, 16765,  1470, 31101, 49860, 26882, 55440),
9213       simde_x_mm_set_epu16(49049, 44537, 10442, 42049,   271, 49034, 11746,  5994),
9214       simde_x_mm_set_epu16(16181,     0,  6323,     0, 30830,   826, 15136, 49446) },
9215     { simde_x_mm_set_epu16(37013,  9547, 22144, 27612, 32177, 62691, 50927, 50782),
9216       simde_x_mm_set_epu16(18153,  2530, 10375, 48140,  7056, 62459, 20700, 31971),
9217       simde_x_mm_set_epu16(18860,  7017, 11769,     0, 25121,   232, 30227, 18811) },
9218     { simde_x_mm_set_epu16( 9831, 28967, 28080, 17470, 59616, 18625, 64250, 31724),
9219       simde_x_mm_set_epu16(52094, 35298, 55420,  3659, 42707, 55727, 29250, 17787),
9220       simde_x_mm_set_epu16(    0,     0,     0, 13811, 16909,     0, 35000, 13937) }
9221   };
9222 
9223   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9224     simde__m128i r = simde_mm_subs_epu16(test_vec[i].a, test_vec[i].b);
9225     simde_assert_m128i_u16(r, ==, test_vec[i].r);
9226   }
9227 
9228   return 0;
9229 }
9230 
9231 static int
9232 test_simde_mm_ucomieq_sd(SIMDE_MUNIT_TEST_ARGS) {
9233   const struct {
9234     simde__m128d a;
9235     simde__m128d b;
9236     int r;
9237   } test_vec[8] = {
9238     { simde_mm_set_pd(SIMDE_FLOAT64_C(  523.45), SIMDE_FLOAT64_C( -718.90)),
9239       simde_mm_set_pd(SIMDE_FLOAT64_C(   39.72), SIMDE_FLOAT64_C(  184.39)),
9240       0 },
9241     { simde_mm_set_pd(SIMDE_FLOAT64_C(  666.01), SIMDE_FLOAT64_C( -592.10)),
9242       simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)),
9243       1 },
9244     { simde_mm_set_pd(SIMDE_FLOAT64_C(  840.01), SIMDE_FLOAT64_C( -550.36)),
9245       simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)),
9246       0 },
9247     { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C(  791.25)),
9248       simde_mm_set_pd(SIMDE_FLOAT64_C(  791.25), SIMDE_FLOAT64_C(  791.25)),
9249       1 },
9250     { simde_mm_set_pd(SIMDE_FLOAT64_C(  743.24), SIMDE_FLOAT64_C(  945.47)),
9251       simde_mm_set_pd(SIMDE_FLOAT64_C(  945.47), SIMDE_FLOAT64_C(  844.58)),
9252       0 },
9253     { simde_mm_set_pd(SIMDE_FLOAT64_C(  938.39), SIMDE_FLOAT64_C( -590.62)),
9254       simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)),
9255       0 },
9256     { simde_mm_set_pd(SIMDE_FLOAT64_C(  876.49), SIMDE_FLOAT64_C(  503.26)),
9257       simde_mm_set_pd(SIMDE_FLOAT64_C(  503.26), SIMDE_FLOAT64_C(  503.26)),
9258       1 },
9259     { simde_mm_set_pd(SIMDE_FLOAT64_C(  927.98), SIMDE_FLOAT64_C( -197.60)),
9260       simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)),
9261       1 }
9262   };
9263 
9264   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9265     int r = simde_mm_ucomieq_sd(test_vec[i].a, test_vec[i].b);
9266     simde_assert_equal_i(r, test_vec[i].r);
9267   }
9268 
9269   return 0;
9270 }
9271 
9272 static int
9273 test_simde_mm_ucomige_sd(SIMDE_MUNIT_TEST_ARGS) {
9274   const struct {
9275     simde__m128d a;
9276     simde__m128d b;
9277     int r;
9278   } test_vec[8] = {
9279     { simde_mm_set_pd(SIMDE_FLOAT64_C(  214.53), SIMDE_FLOAT64_C(  606.90)),
9280       simde_mm_set_pd(SIMDE_FLOAT64_C(  814.33), SIMDE_FLOAT64_C(  606.90)),
9281       1 },
9282     { simde_mm_set_pd(SIMDE_FLOAT64_C( -487.58), SIMDE_FLOAT64_C(  444.56)),
9283       simde_mm_set_pd(SIMDE_FLOAT64_C( -781.36), SIMDE_FLOAT64_C(   30.46)),
9284       1 },
9285     { simde_mm_set_pd(SIMDE_FLOAT64_C(  605.28), SIMDE_FLOAT64_C( -943.32)),
9286       simde_mm_set_pd(SIMDE_FLOAT64_C( -943.32), SIMDE_FLOAT64_C( -943.32)),
9287       1 },
9288     { simde_mm_set_pd(SIMDE_FLOAT64_C( -981.47), SIMDE_FLOAT64_C(   31.75)),
9289       simde_mm_set_pd(SIMDE_FLOAT64_C(   31.75), SIMDE_FLOAT64_C(  299.12)),
9290       0 },
9291     { simde_mm_set_pd(SIMDE_FLOAT64_C(  480.83), SIMDE_FLOAT64_C(  255.57)),
9292       simde_mm_set_pd(SIMDE_FLOAT64_C(  946.90), SIMDE_FLOAT64_C(  608.16)),
9293       0 },
9294     { simde_mm_set_pd(SIMDE_FLOAT64_C(  634.58), SIMDE_FLOAT64_C(  320.38)),
9295       simde_mm_set_pd(SIMDE_FLOAT64_C(  320.38), SIMDE_FLOAT64_C(  942.24)),
9296       0 },
9297     { simde_mm_set_pd(SIMDE_FLOAT64_C(   98.67), SIMDE_FLOAT64_C(  118.05)),
9298       simde_mm_set_pd(SIMDE_FLOAT64_C(  118.05), SIMDE_FLOAT64_C(  118.05)),
9299       1 },
9300     { simde_mm_set_pd(SIMDE_FLOAT64_C(  544.57), SIMDE_FLOAT64_C(  783.14)),
9301       simde_mm_set_pd(SIMDE_FLOAT64_C(  636.80), SIMDE_FLOAT64_C(  783.14)),
9302       1 }
9303   };
9304 
9305   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9306     int r = simde_mm_ucomige_sd(test_vec[i].a, test_vec[i].b);
9307     simde_assert_equal_i(r, test_vec[i].r);
9308   }
9309 
9310   return 0;
9311 }
9312 
9313 static int
9314 test_simde_mm_ucomigt_sd(SIMDE_MUNIT_TEST_ARGS) {
9315   const struct {
9316     simde__m128d a;
9317     simde__m128d b;
9318     int r;
9319   } test_vec[8] = {
9320     { simde_mm_set_pd(SIMDE_FLOAT64_C(  523.45), SIMDE_FLOAT64_C( -718.90)),
9321       simde_mm_set_pd(SIMDE_FLOAT64_C(   39.72), SIMDE_FLOAT64_C(  184.39)),
9322       0 },
9323     { simde_mm_set_pd(SIMDE_FLOAT64_C(  666.01), SIMDE_FLOAT64_C( -592.10)),
9324       simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)),
9325       0 },
9326     { simde_mm_set_pd(SIMDE_FLOAT64_C(  840.01), SIMDE_FLOAT64_C( -550.36)),
9327       simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)),
9328       1 },
9329     { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C(  791.25)),
9330       simde_mm_set_pd(SIMDE_FLOAT64_C(  791.25), SIMDE_FLOAT64_C(  791.25)),
9331       0 },
9332     { simde_mm_set_pd(SIMDE_FLOAT64_C(  743.24), SIMDE_FLOAT64_C(  945.47)),
9333       simde_mm_set_pd(SIMDE_FLOAT64_C(  945.47), SIMDE_FLOAT64_C(  844.58)),
9334       1 },
9335     { simde_mm_set_pd(SIMDE_FLOAT64_C(  938.39), SIMDE_FLOAT64_C( -590.62)),
9336       simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)),
9337       0 },
9338     { simde_mm_set_pd(SIMDE_FLOAT64_C(  876.49), SIMDE_FLOAT64_C(  503.26)),
9339       simde_mm_set_pd(SIMDE_FLOAT64_C(  503.26), SIMDE_FLOAT64_C(  503.26)),
9340       0 },
9341     { simde_mm_set_pd(SIMDE_FLOAT64_C(  927.98), SIMDE_FLOAT64_C( -197.60)),
9342       simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)),
9343       0 }
9344   };
9345 
9346   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9347     int r = simde_mm_ucomigt_sd(test_vec[i].a, test_vec[i].b);
9348     simde_assert_equal_i(r, test_vec[i].r);
9349   }
9350 
9351   return 0;
9352 }
9353 
9354 static int
9355 test_simde_mm_ucomile_sd(SIMDE_MUNIT_TEST_ARGS) {
9356   const struct {
9357     simde__m128d a;
9358     simde__m128d b;
9359     int r;
9360   } test_vec[8] = {
9361     { simde_mm_set_pd(SIMDE_FLOAT64_C(  523.45), SIMDE_FLOAT64_C( -718.90)),
9362       simde_mm_set_pd(SIMDE_FLOAT64_C(   39.72), SIMDE_FLOAT64_C(  184.39)),
9363       1 },
9364     { simde_mm_set_pd(SIMDE_FLOAT64_C(  666.01), SIMDE_FLOAT64_C( -592.10)),
9365       simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)),
9366       1 },
9367     { simde_mm_set_pd(SIMDE_FLOAT64_C(  840.01), SIMDE_FLOAT64_C( -550.36)),
9368       simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)),
9369       0 },
9370     { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C(  791.25)),
9371       simde_mm_set_pd(SIMDE_FLOAT64_C(  791.25), SIMDE_FLOAT64_C(  791.25)),
9372       1 },
9373     { simde_mm_set_pd(SIMDE_FLOAT64_C(  743.24), SIMDE_FLOAT64_C(  945.47)),
9374       simde_mm_set_pd(SIMDE_FLOAT64_C(  945.47), SIMDE_FLOAT64_C(  844.58)),
9375       0 },
9376     { simde_mm_set_pd(SIMDE_FLOAT64_C(  938.39), SIMDE_FLOAT64_C( -590.62)),
9377       simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)),
9378       1 },
9379     { simde_mm_set_pd(SIMDE_FLOAT64_C(  876.49), SIMDE_FLOAT64_C(  503.26)),
9380       simde_mm_set_pd(SIMDE_FLOAT64_C(  503.26), SIMDE_FLOAT64_C(  503.26)),
9381       1 },
9382     { simde_mm_set_pd(SIMDE_FLOAT64_C(  927.98), SIMDE_FLOAT64_C( -197.60)),
9383       simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)),
9384       1 }
9385   };
9386 
9387   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9388     int r = simde_mm_ucomile_sd(test_vec[i].a, test_vec[i].b);
9389     simde_assert_equal_i(r, test_vec[i].r);
9390   }
9391 
9392   return 0;
9393 }
9394 
9395 static int
9396 test_simde_mm_ucomilt_sd(SIMDE_MUNIT_TEST_ARGS) {
9397   const struct {
9398     simde__m128d a;
9399     simde__m128d b;
9400     int r;
9401   } test_vec[8] = {
9402     { simde_mm_set_pd(SIMDE_FLOAT64_C(  523.45), SIMDE_FLOAT64_C( -718.90)),
9403       simde_mm_set_pd(SIMDE_FLOAT64_C(   39.72), SIMDE_FLOAT64_C(  184.39)),
9404       1 },
9405     { simde_mm_set_pd(SIMDE_FLOAT64_C(  666.01), SIMDE_FLOAT64_C( -592.10)),
9406       simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)),
9407       0 },
9408     { simde_mm_set_pd(SIMDE_FLOAT64_C(  840.01), SIMDE_FLOAT64_C( -550.36)),
9409       simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)),
9410       0 },
9411     { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C(  791.25)),
9412       simde_mm_set_pd(SIMDE_FLOAT64_C(  791.25), SIMDE_FLOAT64_C(  791.25)),
9413       0 },
9414     { simde_mm_set_pd(SIMDE_FLOAT64_C(  743.24), SIMDE_FLOAT64_C(  945.47)),
9415       simde_mm_set_pd(SIMDE_FLOAT64_C(  945.47), SIMDE_FLOAT64_C(  844.58)),
9416       0 },
9417     { simde_mm_set_pd(SIMDE_FLOAT64_C(  938.39), SIMDE_FLOAT64_C( -590.62)),
9418       simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)),
9419       1 },
9420     { simde_mm_set_pd(SIMDE_FLOAT64_C(  876.49), SIMDE_FLOAT64_C(  503.26)),
9421       simde_mm_set_pd(SIMDE_FLOAT64_C(  503.26), SIMDE_FLOAT64_C(  503.26)),
9422       0 },
9423     { simde_mm_set_pd(SIMDE_FLOAT64_C(  927.98), SIMDE_FLOAT64_C( -197.60)),
9424       simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)),
9425       0 }
9426   };
9427 
9428   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9429     int r = simde_mm_ucomilt_sd(test_vec[i].a, test_vec[i].b);
9430     simde_assert_equal_i(r, test_vec[i].r);
9431   }
9432 
9433   return 0;
9434 }
9435 
9436 static int
9437 test_simde_mm_undefined_pd(SIMDE_MUNIT_TEST_ARGS) {
9438   simde__m128d z = simde_mm_setzero_pd();
9439   simde__m128d v = simde_mm_undefined_pd();
9440   v = simde_mm_xor_pd(v, v);
9441 
9442   simde_assert_m128d_equal(v, z);
9443 
9444   return 0;
9445 }
9446 
9447 static int
9448 test_simde_mm_undefined_si128(SIMDE_MUNIT_TEST_ARGS) {
9449   simde__m128i z = simde_mm_setzero_si128();
9450   simde__m128i v = simde_mm_undefined_si128();
9451   v = simde_mm_xor_si128(v, v);
9452 
9453   simde_assert_m128i_equal(v, z);
9454 
9455   return 0;
9456 }
9457 
9458 
9459 static int
9460 test_simde_mm_ucomineq_sd(SIMDE_MUNIT_TEST_ARGS) {
9461   const struct {
9462     simde__m128d a;
9463     simde__m128d b;
9464     int r;
9465   } test_vec[8] = {
9466     { simde_mm_set_pd(SIMDE_FLOAT64_C(  523.45), SIMDE_FLOAT64_C( -718.90)),
9467       simde_mm_set_pd(SIMDE_FLOAT64_C(   39.72), SIMDE_FLOAT64_C(  184.39)),
9468       1 },
9469     { simde_mm_set_pd(SIMDE_FLOAT64_C(  666.01), SIMDE_FLOAT64_C( -592.10)),
9470       simde_mm_set_pd(SIMDE_FLOAT64_C( -592.10), SIMDE_FLOAT64_C( -592.10)),
9471       0 },
9472     { simde_mm_set_pd(SIMDE_FLOAT64_C(  840.01), SIMDE_FLOAT64_C( -550.36)),
9473       simde_mm_set_pd(SIMDE_FLOAT64_C( -550.36), SIMDE_FLOAT64_C( -701.38)),
9474       1 },
9475     { simde_mm_set_pd(SIMDE_FLOAT64_C( -236.99), SIMDE_FLOAT64_C(  791.25)),
9476       simde_mm_set_pd(SIMDE_FLOAT64_C(  791.25), SIMDE_FLOAT64_C(  791.25)),
9477       0 },
9478     { simde_mm_set_pd(SIMDE_FLOAT64_C(  743.24), SIMDE_FLOAT64_C(  945.47)),
9479       simde_mm_set_pd(SIMDE_FLOAT64_C(  945.47), SIMDE_FLOAT64_C(  844.58)),
9480       1 },
9481     { simde_mm_set_pd(SIMDE_FLOAT64_C(  938.39), SIMDE_FLOAT64_C( -590.62)),
9482       simde_mm_set_pd(SIMDE_FLOAT64_C( -590.62), SIMDE_FLOAT64_C( -183.26)),
9483       1 },
9484     { simde_mm_set_pd(SIMDE_FLOAT64_C(  876.49), SIMDE_FLOAT64_C(  503.26)),
9485       simde_mm_set_pd(SIMDE_FLOAT64_C(  503.26), SIMDE_FLOAT64_C(  503.26)),
9486       0 },
9487     { simde_mm_set_pd(SIMDE_FLOAT64_C(  927.98), SIMDE_FLOAT64_C( -197.60)),
9488       simde_mm_set_pd(SIMDE_FLOAT64_C( -197.60), SIMDE_FLOAT64_C( -197.60)),
9489       0 }
9490   };
9491 
9492   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9493     int r = simde_mm_ucomineq_sd(test_vec[i].a, test_vec[i].b);
9494     simde_assert_equal_i(r, test_vec[i].r);
9495   }
9496 
9497   return 0;
9498 }
9499 
9500 static int
9501 test_simde_mm_unpackhi_epi8(SIMDE_MUNIT_TEST_ARGS) {
9502   const struct {
9503     simde__m128i a;
9504     simde__m128i b;
9505     simde__m128i r;
9506   } test_vec[8] = {
9507     { simde_mm_set_epi8(INT8_C( -36), INT8_C(  95), INT8_C(  84), INT8_C(-106), INT8_C(  32), INT8_C( 120), INT8_C(  19), INT8_C( -16),
9508                         INT8_C(  46), INT8_C(-123), INT8_C(-117), INT8_C(  46), INT8_C(  18), INT8_C( -72), INT8_C( -36), INT8_C(  78)),
9509       simde_mm_set_epi8(INT8_C(  10), INT8_C(-106), INT8_C( -91), INT8_C( -62), INT8_C( -61), INT8_C( -62), INT8_C( -83), INT8_C( -45),
9510                         INT8_C( -36), INT8_C(  17), INT8_C(  58), INT8_C(-114), INT8_C(  96), INT8_C(-102), INT8_C( -89), INT8_C( -20)),
9511       simde_mm_set_epi8(INT8_C(  10), INT8_C( -36), INT8_C(-106), INT8_C(  95), INT8_C( -91), INT8_C(  84), INT8_C( -62), INT8_C(-106),
9512                         INT8_C( -61), INT8_C(  32), INT8_C( -62), INT8_C( 120), INT8_C( -83), INT8_C(  19), INT8_C( -45), INT8_C( -16)) },
9513     { simde_mm_set_epi8(INT8_C( -54), INT8_C(-123), INT8_C(  74), INT8_C(  62), INT8_C(  43), INT8_C(  85), INT8_C( -99), INT8_C( -95),
9514                         INT8_C( -93), INT8_C( -92), INT8_C( 121), INT8_C(  82), INT8_C(  61), INT8_C(-110), INT8_C(-111), INT8_C( -40)),
9515       simde_mm_set_epi8(INT8_C(  61), INT8_C( -50), INT8_C(  88), INT8_C( -56), INT8_C(  14), INT8_C( -92), INT8_C(-109), INT8_C( -80),
9516                         INT8_C( -22), INT8_C( -61), INT8_C(-108), INT8_C(  69), INT8_C( -82), INT8_C(  29), INT8_C(  38), INT8_C( -72)),
9517       simde_mm_set_epi8(INT8_C(  61), INT8_C( -54), INT8_C( -50), INT8_C(-123), INT8_C(  88), INT8_C(  74), INT8_C( -56), INT8_C(  62),
9518                         INT8_C(  14), INT8_C(  43), INT8_C( -92), INT8_C(  85), INT8_C(-109), INT8_C( -99), INT8_C( -80), INT8_C( -95)) },
9519     { simde_mm_set_epi8(INT8_C(-103), INT8_C( -78), INT8_C( -94), INT8_C( -12), INT8_C( -31), INT8_C( -92), INT8_C( -17), INT8_C(  16),
9520                         INT8_C(-122), INT8_C( 113), INT8_C( -48), INT8_C( -99), INT8_C(  32), INT8_C( -67), INT8_C( 124), INT8_C( 107)),
9521       simde_mm_set_epi8(INT8_C(  42), INT8_C(  65), INT8_C( -45), INT8_C( -19), INT8_C( -55), INT8_C( -49), INT8_C( -54), INT8_C(  56),
9522                         INT8_C( -67), INT8_C( -54), INT8_C(-109), INT8_C( -80), INT8_C( -85), INT8_C(  96), INT8_C( -36), INT8_C( -69)),
9523       simde_mm_set_epi8(INT8_C(  42), INT8_C(-103), INT8_C(  65), INT8_C( -78), INT8_C( -45), INT8_C( -94), INT8_C( -19), INT8_C( -12),
9524                         INT8_C( -55), INT8_C( -31), INT8_C( -49), INT8_C( -92), INT8_C( -54), INT8_C( -17), INT8_C(  56), INT8_C(  16)) },
9525     { simde_mm_set_epi8(INT8_C( -33), INT8_C(  -6), INT8_C( -31), INT8_C( -33), INT8_C( -45), INT8_C( -71), INT8_C( 119), INT8_C(  79),
9526                         INT8_C(  29), INT8_C(   8), INT8_C( -44), INT8_C( -42), INT8_C( 113), INT8_C( -23), INT8_C(  53), INT8_C(-118)),
9527       simde_mm_set_epi8(INT8_C(  -4), INT8_C( -47), INT8_C( -67), INT8_C(  41), INT8_C(  84), INT8_C(   5), INT8_C( -24), INT8_C( 123),
9528                         INT8_C( 102), INT8_C( -69), INT8_C(  66), INT8_C( 117), INT8_C(-128), INT8_C( 115), INT8_C(  -2), INT8_C( -19)),
9529       simde_mm_set_epi8(INT8_C(  -4), INT8_C( -33), INT8_C( -47), INT8_C(  -6), INT8_C( -67), INT8_C( -31), INT8_C(  41), INT8_C( -33),
9530                         INT8_C(  84), INT8_C( -45), INT8_C(   5), INT8_C( -71), INT8_C( -24), INT8_C( 119), INT8_C( 123), INT8_C(  79)) },
9531     { simde_mm_set_epi8(INT8_C(-100), INT8_C( -57), INT8_C(  -5), INT8_C(-111), INT8_C( 124), INT8_C(-127), INT8_C( -90), INT8_C( -88),
9532                         INT8_C(  23), INT8_C(-114), INT8_C( -41), INT8_C( -98), INT8_C(  73), INT8_C(  14), INT8_C(   5), INT8_C(  46)),
9533       simde_mm_set_epi8(INT8_C(  66), INT8_C(-115), INT8_C( -36), INT8_C( -25), INT8_C( -75), INT8_C(-124), INT8_C(  96), INT8_C(  16),
9534                         INT8_C(  14), INT8_C( 103), INT8_C( -98), INT8_C(-105), INT8_C( -21), INT8_C( -89), INT8_C( -87), INT8_C( -43)),
9535       simde_mm_set_epi8(INT8_C(  66), INT8_C(-100), INT8_C(-115), INT8_C( -57), INT8_C( -36), INT8_C(  -5), INT8_C( -25), INT8_C(-111),
9536                         INT8_C( -75), INT8_C( 124), INT8_C(-124), INT8_C(-127), INT8_C(  96), INT8_C( -90), INT8_C(  16), INT8_C( -88)) },
9537     { simde_mm_set_epi8(INT8_C( -66), INT8_C( -23), INT8_C( -71), INT8_C( 103), INT8_C(  67), INT8_C( -33), INT8_C(-118), INT8_C( -19),
9538                         INT8_C(  25), INT8_C( -53), INT8_C(  56), INT8_C(  16), INT8_C(-126), INT8_C( 121), INT8_C(  96), INT8_C(-121)),
9539       simde_mm_set_epi8(INT8_C( -16), INT8_C(  18), INT8_C(  55), INT8_C(-104), INT8_C(-120), INT8_C(  39), INT8_C( -14), INT8_C(  76),
9540                         INT8_C(  39), INT8_C(  41), INT8_C( -81), INT8_C(  -9), INT8_C( -56), INT8_C(-103), INT8_C(   3), INT8_C( -27)),
9541       simde_mm_set_epi8(INT8_C( -16), INT8_C( -66), INT8_C(  18), INT8_C( -23), INT8_C(  55), INT8_C( -71), INT8_C(-104), INT8_C( 103),
9542                         INT8_C(-120), INT8_C(  67), INT8_C(  39), INT8_C( -33), INT8_C( -14), INT8_C(-118), INT8_C(  76), INT8_C( -19)) },
9543     { simde_mm_set_epi8(INT8_C( 114), INT8_C( -36), INT8_C(  60), INT8_C( -26), INT8_C(  24), INT8_C( -63), INT8_C( -29), INT8_C( 114),
9544                         INT8_C(  74), INT8_C( -94), INT8_C(  33), INT8_C( -33), INT8_C(  38), INT8_C( 109), INT8_C(  31), INT8_C( -91)),
9545       simde_mm_set_epi8(INT8_C( -28), INT8_C( -92), INT8_C(  30), INT8_C(-101), INT8_C(  -7), INT8_C(   1), INT8_C(-108), INT8_C(  29),
9546                         INT8_C( 114), INT8_C(  44), INT8_C(  -8), INT8_C(-107), INT8_C( -68), INT8_C(  90), INT8_C( 100), INT8_C( -37)),
9547       simde_mm_set_epi8(INT8_C( -28), INT8_C( 114), INT8_C( -92), INT8_C( -36), INT8_C(  30), INT8_C(  60), INT8_C(-101), INT8_C( -26),
9548                         INT8_C(  -7), INT8_C(  24), INT8_C(   1), INT8_C( -63), INT8_C(-108), INT8_C( -29), INT8_C(  29), INT8_C( 114)) },
9549     { simde_mm_set_epi8(INT8_C(  83), INT8_C( -32), INT8_C( -17), INT8_C( -35), INT8_C(  52), INT8_C( -64), INT8_C(  46), INT8_C(  89),
9550                         INT8_C( -65), INT8_C( -27), INT8_C(-104), INT8_C(   5), INT8_C(  84), INT8_C(  41), INT8_C(  88), INT8_C(  34)),
9551       simde_mm_set_epi8(INT8_C( -95), INT8_C(  93), INT8_C(-118), INT8_C( -44), INT8_C(  65), INT8_C( 114), INT8_C(  28), INT8_C( -90),
9552                         INT8_C( -85), INT8_C( 102), INT8_C(  78), INT8_C( -99), INT8_C(-120), INT8_C(  43), INT8_C( -56), INT8_C(  25)),
9553       simde_mm_set_epi8(INT8_C( -95), INT8_C(  83), INT8_C(  93), INT8_C( -32), INT8_C(-118), INT8_C( -17), INT8_C( -44), INT8_C( -35),
9554                         INT8_C(  65), INT8_C(  52), INT8_C( 114), INT8_C( -64), INT8_C(  28), INT8_C(  46), INT8_C( -90), INT8_C(  89)) }
9555   };
9556 
9557   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9558     simde__m128i r = simde_mm_unpackhi_epi8(test_vec[i].a, test_vec[i].b);
9559     simde_assert_m128i_i8(r, ==, test_vec[i].r);
9560   }
9561 
9562   return 0;
9563 }
9564 
9565 static int
9566 test_simde_mm_unpackhi_epi16(SIMDE_MUNIT_TEST_ARGS) {
9567   const struct {
9568     simde__m128i a;
9569     simde__m128i b;
9570     simde__m128i r;
9571   } test_vec[8] = {
9572     { simde_mm_set_epi16(INT16_C( 18787), INT16_C( 30957), INT16_C(  6745), INT16_C(  5288),
9573                          INT16_C(-10333), INT16_C( 29461), INT16_C(   961), INT16_C(-14007)),
9574       simde_mm_set_epi16(INT16_C(-29691), INT16_C( 32561), INT16_C(-16442), INT16_C( -4659),
9575                          INT16_C( 21222), INT16_C(-21527), INT16_C( 30610), INT16_C( 14168)),
9576       simde_mm_set_epi16(INT16_C(-29691), INT16_C( 18787), INT16_C( 32561), INT16_C( 30957),
9577                          INT16_C(-16442), INT16_C(  6745), INT16_C( -4659), INT16_C(  5288)) },
9578     { simde_mm_set_epi16(INT16_C( 14241), INT16_C(-17353), INT16_C( 15871), INT16_C(  3653),
9579                          INT16_C(-29200), INT16_C( -9979), INT16_C(-30607), INT16_C( 31741)),
9580       simde_mm_set_epi16(INT16_C( 16753), INT16_C( 10981), INT16_C( 24190), INT16_C( 25811),
9581                          INT16_C(  6793), INT16_C( -6051), INT16_C(  1979), INT16_C(-14675)),
9582       simde_mm_set_epi16(INT16_C( 16753), INT16_C( 14241), INT16_C( 10981), INT16_C(-17353),
9583                          INT16_C( 24190), INT16_C( 15871), INT16_C( 25811), INT16_C(  3653)) },
9584     { simde_mm_set_epi16(INT16_C( 24118), INT16_C( -7950), INT16_C(  8813), INT16_C( 23815),
9585                          INT16_C(-12880), INT16_C( 22441), INT16_C(-31736), INT16_C( 28417)),
9586       simde_mm_set_epi16(INT16_C( -2535), INT16_C(-21518), INT16_C( 10955), INT16_C(-16484),
9587                          INT16_C(-17119), INT16_C(  5667), INT16_C(  5018), INT16_C( -9313)),
9588       simde_mm_set_epi16(INT16_C( -2535), INT16_C( 24118), INT16_C(-21518), INT16_C( -7950),
9589                          INT16_C( 10955), INT16_C(  8813), INT16_C(-16484), INT16_C( 23815)) },
9590     { simde_mm_set_epi16(INT16_C(-15717), INT16_C(  7765), INT16_C(-27156), INT16_C( 26721),
9591                          INT16_C( -2021), INT16_C( -7166), INT16_C(   832), INT16_C(  3368)),
9592       simde_mm_set_epi16(INT16_C(-17604), INT16_C( -2433), INT16_C(-22343), INT16_C( -9047),
9593                          INT16_C( -8009), INT16_C(-14884), INT16_C(-31015), INT16_C(  9072)),
9594       simde_mm_set_epi16(INT16_C(-17604), INT16_C(-15717), INT16_C( -2433), INT16_C(  7765),
9595                          INT16_C(-22343), INT16_C(-27156), INT16_C( -9047), INT16_C( 26721)) },
9596     { simde_mm_set_epi16(INT16_C(  9613), INT16_C(-25734), INT16_C(-29111), INT16_C( -6271),
9597                          INT16_C( 28183), INT16_C(  5627), INT16_C( 23471), INT16_C(-31640)),
9598       simde_mm_set_epi16(INT16_C( 17448), INT16_C(-17387), INT16_C( 12535), INT16_C( 19499),
9599                          INT16_C( 11772), INT16_C(  2463), INT16_C( 20494), INT16_C( -6320)),
9600       simde_mm_set_epi16(INT16_C( 17448), INT16_C(  9613), INT16_C(-17387), INT16_C(-25734),
9601                          INT16_C( 12535), INT16_C(-29111), INT16_C( 19499), INT16_C( -6271)) },
9602     { simde_mm_set_epi16(INT16_C(-23597), INT16_C(-19655), INT16_C(-17057), INT16_C( 18059),
9603                          INT16_C(  9484), INT16_C(  5905), INT16_C( 26068), INT16_C(  7424)),
9604       simde_mm_set_epi16(INT16_C(-16983), INT16_C( -3720), INT16_C(-18613), INT16_C(  7615),
9605                          INT16_C(-29369), INT16_C(-17019), INT16_C(   736), INT16_C( 23842)),
9606       simde_mm_set_epi16(INT16_C(-16983), INT16_C(-23597), INT16_C( -3720), INT16_C(-19655),
9607                          INT16_C(-18613), INT16_C(-17057), INT16_C(  7615), INT16_C( 18059)) },
9608     { simde_mm_set_epi16(INT16_C( 10339), INT16_C(  5875), INT16_C(-28772), INT16_C(  4220),
9609                          INT16_C( 31801), INT16_C( 29049), INT16_C( 31270), INT16_C(-18878)),
9610       simde_mm_set_epi16(INT16_C(-18888), INT16_C( 24242), INT16_C(-31726), INT16_C(-29025),
9611                          INT16_C(   845), INT16_C( -8031), INT16_C(  4992), INT16_C( -3599)),
9612       simde_mm_set_epi16(INT16_C(-18888), INT16_C( 10339), INT16_C( 24242), INT16_C(  5875),
9613                          INT16_C(-31726), INT16_C(-28772), INT16_C(-29025), INT16_C(  4220)) },
9614     { simde_mm_set_epi16(INT16_C(-14097), INT16_C( 31063), INT16_C(-25063), INT16_C( 16951),
9615                          INT16_C(-20725), INT16_C(  5387), INT16_C( -3219), INT16_C(-20465)),
9616       simde_mm_set_epi16(INT16_C(-23465), INT16_C(-30434), INT16_C( 28479), INT16_C(-15276),
9617                          INT16_C(-28694), INT16_C( -9228), INT16_C( 22420), INT16_C(-31453)),
9618       simde_mm_set_epi16(INT16_C(-23465), INT16_C(-14097), INT16_C(-30434), INT16_C( 31063),
9619                          INT16_C( 28479), INT16_C(-25063), INT16_C(-15276), INT16_C( 16951)) }
9620   };
9621 
9622   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9623     simde__m128i r = simde_mm_unpackhi_epi16(test_vec[i].a, test_vec[i].b);
9624     simde_assert_m128i_i16(r, ==, test_vec[i].r);
9625   }
9626 
9627   return 0;
9628 }
9629 
9630 static int
9631 test_simde_mm_unpackhi_epi32(SIMDE_MUNIT_TEST_ARGS) {
9632   const struct {
9633     simde__m128i a;
9634     simde__m128i b;
9635     simde__m128i r;
9636   } test_vec[8] = {
9637     { simde_mm_set_epi32(INT32_C(  168291084), INT32_C(  803222516), INT32_C(-2059191165), INT32_C(  156619127)),
9638       simde_mm_set_epi32(INT32_C( 1247164255), INT32_C(-1585504202), INT32_C(   81979034), INT32_C(-1257437380)),
9639       simde_mm_set_epi32(INT32_C( 1247164255), INT32_C(  168291084), INT32_C(-1585504202), INT32_C(  803222516)) },
9640     { simde_mm_set_epi32(INT32_C(-1229392695), INT32_C( -447420261), INT32_C(  -26173961), INT32_C( 1549193795)),
9641       simde_mm_set_epi32(INT32_C(-1584985518), INT32_C(-1825626458), INT32_C( 1790250510), INT32_C( -280669042)),
9642       simde_mm_set_epi32(INT32_C(-1584985518), INT32_C(-1229392695), INT32_C(-1825626458), INT32_C( -447420261)) },
9643     { simde_mm_set_epi32(INT32_C( -648698663), INT32_C( 1485053046), INT32_C(-2125470397), INT32_C(  507664294)),
9644       simde_mm_set_epi32(INT32_C( -735759218), INT32_C( -710175418), INT32_C(-1695159870), INT32_C(-1167064304)),
9645       simde_mm_set_epi32(INT32_C( -735759218), INT32_C( -648698663), INT32_C( -710175418), INT32_C( 1485053046)) },
9646     { simde_mm_set_epi32(INT32_C( -103259786), INT32_C( -188357300), INT32_C(  452180145), INT32_C(-1396420115)),
9647       simde_mm_set_epi32(INT32_C( 1404727965), INT32_C( -804737565), INT32_C(-1054802326), INT32_C( 1642647928)),
9648       simde_mm_set_epi32(INT32_C( 1404727965), INT32_C( -103259786), INT32_C( -804737565), INT32_C( -188357300)) },
9649     { simde_mm_set_epi32(INT32_C( 1212827068), INT32_C( 1189440629), INT32_C(-1547155816), INT32_C( 1839063433)),
9650       simde_mm_set_epi32(INT32_C(  796540528), INT32_C( -982269468), INT32_C(  -40316418), INT32_C( -430354120)),
9651       simde_mm_set_epi32(INT32_C(  796540528), INT32_C( 1212827068), INT32_C( -982269468), INT32_C( 1189440629)) },
9652     { simde_mm_set_epi32(INT32_C( 1356454008), INT32_C( -215878264), INT32_C(-1695191474), INT32_C(  378220333)),
9653       simde_mm_set_epi32(INT32_C( -864195447), INT32_C(-1443486627), INT32_C(-2133730470), INT32_C(  373467456)),
9654       simde_mm_set_epi32(INT32_C( -864195447), INT32_C( 1356454008), INT32_C(-1443486627), INT32_C( -215878264)) },
9655     { simde_mm_set_epi32(INT32_C(  764442598), INT32_C( 1720554406), INT32_C( 1938751418), INT32_C( 1005471402)),
9656       simde_mm_set_epi32(INT32_C(  883878116), INT32_C(  255422854), INT32_C(  583152961), INT32_C( -594123403)),
9657       simde_mm_set_epi32(INT32_C(  883878116), INT32_C(  764442598), INT32_C(  255422854), INT32_C( 1720554406)) },
9658     { simde_mm_set_epi32(INT32_C( -822423451), INT32_C( -180339328), INT32_C( -689601673), INT32_C(-1524838623)),
9659       simde_mm_set_epi32(INT32_C( -665157473), INT32_C(-2141208691), INT32_C(-1935796365), INT32_C( -482464349)),
9660       simde_mm_set_epi32(INT32_C( -665157473), INT32_C( -822423451), INT32_C(-2141208691), INT32_C( -180339328)) }
9661   };
9662 
9663   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9664     simde__m128i r = simde_mm_unpackhi_epi32(test_vec[i].a, test_vec[i].b);
9665     simde_assert_m128i_i32(r, ==, test_vec[i].r);
9666   }
9667 
9668   return 0;
9669 }
9670 
9671 static int
9672 test_simde_mm_unpackhi_epi64(SIMDE_MUNIT_TEST_ARGS) {
9673   const struct {
9674     simde__m128i a;
9675     simde__m128i b;
9676     simde__m128i r;
9677   } test_vec[8] = {
9678     { simde_mm_set_epi64x(INT64_C(  722804702791611380), INT64_C(-8844158709730520713)),
9679       simde_mm_set_epi64x(INT64_C( 5356529690674667574), INT64_C(  352097273025201980)),
9680       simde_mm_set_epi64x(INT64_C( 5356529690674667574), INT64_C(  722804702791611380)) },
9681     { simde_mm_set_epi64x(INT64_C(-5280201415118755685), INT64_C( -112416304952585661)),
9682       simde_mm_set_epi64x(INT64_C(-6807460961974278490), INT64_C( 7689067396111619214)),
9683       simde_mm_set_epi64x(INT64_C(-6807460961974278490), INT64_C(-5280201415118755685)) },
9684     { simde_mm_set_epi64x(INT64_C(-2786139541058872202), INT64_C(-9128825843223472218)),
9685       simde_mm_set_epi64x(INT64_C(-3160061775455742650), INT64_C(-7280656200013708528)),
9686       simde_mm_set_epi64x(INT64_C(-3160061775455742650), INT64_C(-2786139541058872202)) },
9687     { simde_mm_set_epi64x(INT64_C( -443497399755348660), INT64_C( 1942098937574085101)),
9688       simde_mm_set_epi64x(INT64_C( 6033260672941862371), INT64_C(-4530341492272082568)),
9689       simde_mm_set_epi64x(INT64_C( 6033260672941862371), INT64_C( -443497399755348660)) },
9690     { simde_mm_set_epi64x(INT64_C( 5209052593953008757), INT64_C(-6644983629697130103)),
9691       simde_mm_set_epi64x(INT64_C( 3421115521011270116), INT64_C( -173157692937252552)),
9692       simde_mm_set_epi64x(INT64_C( 3421115521011270116), INT64_C( 5209052593953008757)) },
9693     { simde_mm_set_epi64x(INT64_C( 5825925606967211400), INT64_C(-7280791940909813971)),
9694       simde_mm_set_epi64x(INT64_C(-3711691179365620643), INT64_C(-9164302586755241664)),
9695       simde_mm_set_epi64x(INT64_C(-3711691179365620643), INT64_C( 5825925606967211400)) },
9696     { simde_mm_set_epi64x(INT64_C( 3283255959799829414), INT64_C( 8326873936389097130)),
9697       simde_mm_set_epi64x(INT64_C( 3796227602125517190), INT64_C( 2504622899761407349)),
9698       simde_mm_set_epi64x(INT64_C( 3796227602125517190), INT64_C( 3283255959799829414)) },
9699     { simde_mm_set_epi64x(INT64_C(-3532281821393830528), INT64_C(-2961816630031757535)),
9700       simde_mm_set_epi64x(INT64_C(-2856829591071244403), INT64_C(-8314182075578176093)),
9701       simde_mm_set_epi64x(INT64_C(-2856829591071244403), INT64_C(-3532281821393830528)) }
9702   };
9703 
9704   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9705     simde__m128i r = simde_mm_unpackhi_epi64(test_vec[i].a, test_vec[i].b);
9706     simde_assert_m128i_i64(r, ==, test_vec[i].r);
9707   }
9708 
9709   return 0;
9710 }
9711 
9712 static int
9713 test_simde_mm_unpackhi_pd(SIMDE_MUNIT_TEST_ARGS) {
9714   const struct {
9715     simde__m128d a;
9716     simde__m128d b;
9717     simde__m128d r;
9718   } test_vec[8] = {
9719     { simde_mm_set_pd(SIMDE_FLOAT64_C( -788.38), SIMDE_FLOAT64_C(  -23.22)),
9720       simde_mm_set_pd(SIMDE_FLOAT64_C( -996.21), SIMDE_FLOAT64_C(  645.47)),
9721       simde_mm_set_pd(SIMDE_FLOAT64_C( -996.21), SIMDE_FLOAT64_C( -788.38)) },
9722     { simde_mm_set_pd(SIMDE_FLOAT64_C( -986.13), SIMDE_FLOAT64_C(  267.77)),
9723       simde_mm_set_pd(SIMDE_FLOAT64_C(  401.03), SIMDE_FLOAT64_C(  978.53)),
9724       simde_mm_set_pd(SIMDE_FLOAT64_C(  401.03), SIMDE_FLOAT64_C( -986.13)) },
9725     { simde_mm_set_pd(SIMDE_FLOAT64_C( -468.32), SIMDE_FLOAT64_C( -478.73)),
9726       simde_mm_set_pd(SIMDE_FLOAT64_C( -484.79), SIMDE_FLOAT64_C( -613.68)),
9727       simde_mm_set_pd(SIMDE_FLOAT64_C( -484.79), SIMDE_FLOAT64_C( -468.32)) },
9728     { simde_mm_set_pd(SIMDE_FLOAT64_C(  169.21), SIMDE_FLOAT64_C(  897.06)),
9729       simde_mm_set_pd(SIMDE_FLOAT64_C( -872.63), SIMDE_FLOAT64_C( -172.69)),
9730       simde_mm_set_pd(SIMDE_FLOAT64_C( -872.63), SIMDE_FLOAT64_C(  169.21)) },
9731     { simde_mm_set_pd(SIMDE_FLOAT64_C(  499.02), SIMDE_FLOAT64_C(   28.99)),
9732       simde_mm_set_pd(SIMDE_FLOAT64_C(  532.77), SIMDE_FLOAT64_C( -718.79)),
9733       simde_mm_set_pd(SIMDE_FLOAT64_C(  532.77), SIMDE_FLOAT64_C(  499.02)) },
9734     { simde_mm_set_pd(SIMDE_FLOAT64_C(  208.34), SIMDE_FLOAT64_C(  635.19)),
9735       simde_mm_set_pd(SIMDE_FLOAT64_C( -165.40), SIMDE_FLOAT64_C(  391.08)),
9736       simde_mm_set_pd(SIMDE_FLOAT64_C( -165.40), SIMDE_FLOAT64_C(  208.34)) },
9737     { simde_mm_set_pd(SIMDE_FLOAT64_C( -371.80), SIMDE_FLOAT64_C(  698.49)),
9738       simde_mm_set_pd(SIMDE_FLOAT64_C(  603.26), SIMDE_FLOAT64_C(  962.25)),
9739       simde_mm_set_pd(SIMDE_FLOAT64_C(  603.26), SIMDE_FLOAT64_C( -371.80)) },
9740     { simde_mm_set_pd(SIMDE_FLOAT64_C( -939.32), SIMDE_FLOAT64_C(  149.18)),
9741       simde_mm_set_pd(SIMDE_FLOAT64_C(  349.36), SIMDE_FLOAT64_C(  -60.66)),
9742       simde_mm_set_pd(SIMDE_FLOAT64_C(  349.36), SIMDE_FLOAT64_C( -939.32)) }
9743   };
9744 
9745   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9746     simde__m128d r = simde_mm_unpackhi_pd(test_vec[i].a, test_vec[i].b);
9747     simde_assert_m128d_close(r, test_vec[i].r, 1);
9748   }
9749 
9750   return 0;
9751 }
9752 
9753 static int
9754 test_simde_mm_unpacklo_epi8(SIMDE_MUNIT_TEST_ARGS) {
9755   const struct {
9756     simde__m128i a;
9757     simde__m128i b;
9758     simde__m128i r;
9759   } test_vec[8] = {
9760     { simde_mm_set_epi8(INT8_C( -46), INT8_C(  11), INT8_C( -95), INT8_C(  -6), INT8_C(-108), INT8_C(-107), INT8_C( -24), INT8_C( -94),
9761                         INT8_C( -54), INT8_C(   2), INT8_C( 111), INT8_C(  78), INT8_C(  16), INT8_C( -54), INT8_C( -31), INT8_C( -19)),
9762       simde_mm_set_epi8(INT8_C(  40), INT8_C( -29), INT8_C( -79), INT8_C( -49), INT8_C(  12), INT8_C( -63), INT8_C(  87), INT8_C(  55),
9763                         INT8_C( 121), INT8_C( 100), INT8_C( -21), INT8_C(  -2), INT8_C( -22), INT8_C(  29), INT8_C( 110), INT8_C(-110)),
9764       simde_mm_set_epi8(INT8_C( 121), INT8_C( -54), INT8_C( 100), INT8_C(   2), INT8_C( -21), INT8_C( 111), INT8_C(  -2), INT8_C(  78),
9765                         INT8_C( -22), INT8_C(  16), INT8_C(  29), INT8_C( -54), INT8_C( 110), INT8_C( -31), INT8_C(-110), INT8_C( -19)) },
9766     { simde_mm_set_epi8(INT8_C(  40), INT8_C( -52), INT8_C( -72), INT8_C(   9), INT8_C( -57), INT8_C( -62), INT8_C(-100), INT8_C( 119),
9767                         INT8_C( 120), INT8_C( -83), INT8_C( 102), INT8_C( -39), INT8_C( -78), INT8_C( -92), INT8_C( -76), INT8_C( 121)),
9768       simde_mm_set_epi8(INT8_C(   7), INT8_C( -69), INT8_C(-112), INT8_C(  84), INT8_C(  -8), INT8_C(  23), INT8_C(  71), INT8_C( -37),
9769                         INT8_C( 104), INT8_C(-121), INT8_C( -93), INT8_C(  99), INT8_C(  47), INT8_C(-114), INT8_C( -52), INT8_C( 101)),
9770       simde_mm_set_epi8(INT8_C( 104), INT8_C( 120), INT8_C(-121), INT8_C( -83), INT8_C( -93), INT8_C( 102), INT8_C(  99), INT8_C( -39),
9771                         INT8_C(  47), INT8_C( -78), INT8_C(-114), INT8_C( -92), INT8_C( -52), INT8_C( -76), INT8_C( 101), INT8_C( 121)) },
9772     { simde_mm_set_epi8(INT8_C(  23), INT8_C(  31), INT8_C( -95), INT8_C( -23), INT8_C( -83), INT8_C(  40), INT8_C( -32), INT8_C(  -4),
9773                         INT8_C(  97), INT8_C( 107), INT8_C(-118), INT8_C(  28), INT8_C(  58), INT8_C( -42), INT8_C(   6), INT8_C(  14)),
9774       simde_mm_set_epi8(INT8_C(  87), INT8_C( -63), INT8_C(  17), INT8_C( -66), INT8_C( -73), INT8_C( -52), INT8_C(  21), INT8_C( -51),
9775                         INT8_C(  77), INT8_C( 127), INT8_C(-123), INT8_C(  35), INT8_C( -87), INT8_C(  10), INT8_C(-116), INT8_C( -15)),
9776       simde_mm_set_epi8(INT8_C(  77), INT8_C(  97), INT8_C( 127), INT8_C( 107), INT8_C(-123), INT8_C(-118), INT8_C(  35), INT8_C(  28),
9777                         INT8_C( -87), INT8_C(  58), INT8_C(  10), INT8_C( -42), INT8_C(-116), INT8_C(   6), INT8_C( -15), INT8_C(  14)) },
9778     { simde_mm_set_epi8(INT8_C(  82), INT8_C( -82), INT8_C( 120), INT8_C(-117), INT8_C(  95), INT8_C(  34), INT8_C(  57), INT8_C(-126),
9779                         INT8_C( 125), INT8_C( -41), INT8_C(  26), INT8_C( -67), INT8_C( -28), INT8_C( 110), INT8_C(  56), INT8_C(   8)),
9780       simde_mm_set_epi8(INT8_C(  43), INT8_C(  84), INT8_C( -22), INT8_C( -23), INT8_C(-118), INT8_C( 101), INT8_C( -61), INT8_C(   0),
9781                         INT8_C( 102), INT8_C(  10), INT8_C( -14), INT8_C( -26), INT8_C( -16), INT8_C(  -9), INT8_C(-102), INT8_C(  -6)),
9782       simde_mm_set_epi8(INT8_C( 102), INT8_C( 125), INT8_C(  10), INT8_C( -41), INT8_C( -14), INT8_C(  26), INT8_C( -26), INT8_C( -67),
9783                         INT8_C( -16), INT8_C( -28), INT8_C(  -9), INT8_C( 110), INT8_C(-102), INT8_C(  56), INT8_C(  -6), INT8_C(   8)) },
9784     { simde_mm_set_epi8(INT8_C( -53), INT8_C( -22), INT8_C(  64), INT8_C( -17), INT8_C( -84), INT8_C(-128), INT8_C(-124), INT8_C( -98),
9785                         INT8_C( -10), INT8_C( -24), INT8_C(  47), INT8_C( 109), INT8_C(  15), INT8_C( -93), INT8_C(  -3), INT8_C( -83)),
9786       simde_mm_set_epi8(INT8_C( 102), INT8_C(  24), INT8_C(  10), INT8_C(  77), INT8_C( -47), INT8_C( 121), INT8_C(  -9), INT8_C(  31),
9787                         INT8_C(   5), INT8_C(  32), INT8_C( -40), INT8_C(  72), INT8_C(-114), INT8_C( -28), INT8_C(  76), INT8_C(  98)),
9788       simde_mm_set_epi8(INT8_C(   5), INT8_C( -10), INT8_C(  32), INT8_C( -24), INT8_C( -40), INT8_C(  47), INT8_C(  72), INT8_C( 109),
9789                         INT8_C(-114), INT8_C(  15), INT8_C( -28), INT8_C( -93), INT8_C(  76), INT8_C(  -3), INT8_C(  98), INT8_C( -83)) },
9790     { simde_mm_set_epi8(INT8_C(  42), INT8_C(-126), INT8_C( -81), INT8_C(  -3), INT8_C(  60), INT8_C( -79), INT8_C(  80), INT8_C( -92),
9791                         INT8_C( -48), INT8_C(  40), INT8_C(-125), INT8_C(  24), INT8_C(  38), INT8_C( -84), INT8_C( 120), INT8_C(  92)),
9792       simde_mm_set_epi8(INT8_C(-118), INT8_C(-121), INT8_C(  29), INT8_C(-128), INT8_C(-101), INT8_C(   4), INT8_C( -66), INT8_C(  29),
9793                         INT8_C(  -3), INT8_C(  82), INT8_C(  -7), INT8_C( -87), INT8_C(  76), INT8_C(  52), INT8_C(-124), INT8_C(  86)),
9794       simde_mm_set_epi8(INT8_C(  -3), INT8_C( -48), INT8_C(  82), INT8_C(  40), INT8_C(  -7), INT8_C(-125), INT8_C( -87), INT8_C(  24),
9795                         INT8_C(  76), INT8_C(  38), INT8_C(  52), INT8_C( -84), INT8_C(-124), INT8_C( 120), INT8_C(  86), INT8_C(  92)) },
9796     { simde_mm_set_epi8(INT8_C(-121), INT8_C( 102), INT8_C( -71), INT8_C(-105), INT8_C(-120), INT8_C( 124), INT8_C( -56), INT8_C(  80),
9797                         INT8_C( -23), INT8_C(  26), INT8_C(-103), INT8_C(  31), INT8_C( -30), INT8_C( -86), INT8_C( 103), INT8_C( -93)),
9798       simde_mm_set_epi8(INT8_C(-114), INT8_C(   9), INT8_C(  28), INT8_C( -23), INT8_C( 125), INT8_C(  28), INT8_C( -55), INT8_C( -13),
9799                         INT8_C( -41), INT8_C( 123), INT8_C( -52), INT8_C(  49), INT8_C( -94), INT8_C( -66), INT8_C(  69), INT8_C(  75)),
9800       simde_mm_set_epi8(INT8_C( -41), INT8_C( -23), INT8_C( 123), INT8_C(  26), INT8_C( -52), INT8_C(-103), INT8_C(  49), INT8_C(  31),
9801                         INT8_C( -94), INT8_C( -30), INT8_C( -66), INT8_C( -86), INT8_C(  69), INT8_C( 103), INT8_C(  75), INT8_C( -93)) },
9802     { simde_mm_set_epi8(INT8_C( -30), INT8_C(  56), INT8_C(  -7), INT8_C( -85), INT8_C(  -3), INT8_C( -30), INT8_C(  87), INT8_C( 101),
9803                         INT8_C(-112), INT8_C( -18), INT8_C(   7), INT8_C(  45), INT8_C(  32), INT8_C( 103), INT8_C(  -2), INT8_C( 100)),
9804       simde_mm_set_epi8(INT8_C(  75), INT8_C( -55), INT8_C(   1), INT8_C(  61), INT8_C(-126), INT8_C( -76), INT8_C(  61), INT8_C( -69),
9805                         INT8_C( -86), INT8_C( 110), INT8_C( -52), INT8_C( 110), INT8_C(  96), INT8_C( -55), INT8_C(  76), INT8_C(  15)),
9806       simde_mm_set_epi8(INT8_C( -86), INT8_C(-112), INT8_C( 110), INT8_C( -18), INT8_C( -52), INT8_C(   7), INT8_C( 110), INT8_C(  45),
9807                         INT8_C(  96), INT8_C(  32), INT8_C( -55), INT8_C( 103), INT8_C(  76), INT8_C(  -2), INT8_C(  15), INT8_C( 100)) }
9808   };
9809 
9810   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9811     simde__m128i r = simde_mm_unpacklo_epi8(test_vec[i].a, test_vec[i].b);
9812     simde_assert_m128i_i8(r, ==, test_vec[i].r);
9813   }
9814 
9815   return 0;
9816 }
9817 
9818 static int
9819 test_simde_mm_unpacklo_epi16(SIMDE_MUNIT_TEST_ARGS) {
9820   const struct {
9821     simde__m128i a;
9822     simde__m128i b;
9823     simde__m128i r;
9824   } test_vec[8] = {
9825     { simde_mm_set_epi16(INT16_C(-11765), INT16_C(-24070), INT16_C(-27499), INT16_C( -5982),
9826                          INT16_C(-13822), INT16_C( 28494), INT16_C(  4298), INT16_C( -7699)),
9827       simde_mm_set_epi16(INT16_C( 10467), INT16_C(-20017), INT16_C(  3265), INT16_C( 22327),
9828                          INT16_C( 31076), INT16_C( -5122), INT16_C( -5603), INT16_C( 28306)),
9829       simde_mm_set_epi16(INT16_C( 31076), INT16_C(-13822), INT16_C( -5122), INT16_C( 28494),
9830                          INT16_C( -5603), INT16_C(  4298), INT16_C( 28306), INT16_C( -7699)) },
9831     { simde_mm_set_epi16(INT16_C( 10444), INT16_C(-18423), INT16_C(-14398), INT16_C(-25481),
9832                          INT16_C( 30893), INT16_C( 26329), INT16_C(-19804), INT16_C(-19335)),
9833       simde_mm_set_epi16(INT16_C(  1979), INT16_C(-28588), INT16_C( -2025), INT16_C( 18395),
9834                          INT16_C( 26759), INT16_C(-23709), INT16_C( 12174), INT16_C(-13211)),
9835       simde_mm_set_epi16(INT16_C( 26759), INT16_C( 30893), INT16_C(-23709), INT16_C( 26329),
9836                          INT16_C( 12174), INT16_C(-19804), INT16_C(-13211), INT16_C(-19335)) },
9837     { simde_mm_set_epi16(INT16_C(  5919), INT16_C(-24087), INT16_C(-21208), INT16_C( -7940),
9838                          INT16_C( 24939), INT16_C(-30180), INT16_C( 15062), INT16_C(  1550)),
9839       simde_mm_set_epi16(INT16_C( 22465), INT16_C(  4542), INT16_C(-18484), INT16_C(  5581),
9840                          INT16_C( 19839), INT16_C(-31453), INT16_C(-22262), INT16_C(-29455)),
9841       simde_mm_set_epi16(INT16_C( 19839), INT16_C( 24939), INT16_C(-31453), INT16_C(-30180),
9842                          INT16_C(-22262), INT16_C( 15062), INT16_C(-29455), INT16_C(  1550)) },
9843     { simde_mm_set_epi16(INT16_C( 21166), INT16_C( 30859), INT16_C( 24354), INT16_C( 14722),
9844                          INT16_C( 32215), INT16_C(  6845), INT16_C( -7058), INT16_C( 14344)),
9845       simde_mm_set_epi16(INT16_C( 11092), INT16_C( -5399), INT16_C(-30107), INT16_C(-15616),
9846                          INT16_C( 26122), INT16_C( -3354), INT16_C( -3849), INT16_C(-25862)),
9847       simde_mm_set_epi16(INT16_C( 26122), INT16_C( 32215), INT16_C( -3354), INT16_C(  6845),
9848                          INT16_C( -3849), INT16_C( -7058), INT16_C(-25862), INT16_C( 14344)) },
9849     { simde_mm_set_epi16(INT16_C(-13334), INT16_C( 16623), INT16_C(-21376), INT16_C(-31586),
9850                          INT16_C( -2328), INT16_C( 12141), INT16_C(  4003), INT16_C(  -595)),
9851       simde_mm_set_epi16(INT16_C( 26136), INT16_C(  2637), INT16_C(-11911), INT16_C( -2273),
9852                          INT16_C(  1312), INT16_C(-10168), INT16_C(-28956), INT16_C( 19554)),
9853       simde_mm_set_epi16(INT16_C(  1312), INT16_C( -2328), INT16_C(-10168), INT16_C( 12141),
9854                          INT16_C(-28956), INT16_C(  4003), INT16_C( 19554), INT16_C(  -595)) },
9855     { simde_mm_set_epi16(INT16_C( 10882), INT16_C(-20483), INT16_C( 15537), INT16_C( 20644),
9856                          INT16_C(-12248), INT16_C(-31976), INT16_C(  9900), INT16_C( 30812)),
9857       simde_mm_set_epi16(INT16_C(-30073), INT16_C(  7552), INT16_C(-25852), INT16_C(-16867),
9858                          INT16_C(  -686), INT16_C( -1623), INT16_C( 19508), INT16_C(-31658)),
9859       simde_mm_set_epi16(INT16_C(  -686), INT16_C(-12248), INT16_C( -1623), INT16_C(-31976),
9860                          INT16_C( 19508), INT16_C(  9900), INT16_C(-31658), INT16_C( 30812)) },
9861     { simde_mm_set_epi16(INT16_C(-30874), INT16_C(-18025), INT16_C(-30596), INT16_C(-14256),
9862                          INT16_C( -5862), INT16_C(-26337), INT16_C( -7510), INT16_C( 26531)),
9863       simde_mm_set_epi16(INT16_C(-29175), INT16_C(  7401), INT16_C( 32028), INT16_C(-13837),
9864                          INT16_C(-10373), INT16_C(-13263), INT16_C(-23874), INT16_C( 17739)),
9865       simde_mm_set_epi16(INT16_C(-10373), INT16_C( -5862), INT16_C(-13263), INT16_C(-26337),
9866                          INT16_C(-23874), INT16_C( -7510), INT16_C( 17739), INT16_C( 26531)) },
9867     { simde_mm_set_epi16(INT16_C( -7624), INT16_C( -1621), INT16_C(  -542), INT16_C( 22373),
9868                          INT16_C(-28434), INT16_C(  1837), INT16_C(  8295), INT16_C(  -412)),
9869       simde_mm_set_epi16(INT16_C( 19401), INT16_C(   317), INT16_C(-32076), INT16_C( 15803),
9870                          INT16_C(-21906), INT16_C(-13202), INT16_C( 24777), INT16_C( 19471)),
9871       simde_mm_set_epi16(INT16_C(-21906), INT16_C(-28434), INT16_C(-13202), INT16_C(  1837),
9872                          INT16_C( 24777), INT16_C(  8295), INT16_C( 19471), INT16_C(  -412)) }
9873   };
9874 
9875   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9876     simde__m128i r = simde_mm_unpacklo_epi16(test_vec[i].a, test_vec[i].b);
9877     simde_assert_m128i_i16(r, ==, test_vec[i].r);
9878   }
9879 
9880   return 0;
9881 }
9882 
9883 static int
9884 test_simde_mm_unpacklo_epi32(SIMDE_MUNIT_TEST_ARGS) {
9885   const struct {
9886     simde__m128i a;
9887     simde__m128i b;
9888     simde__m128i r;
9889   } test_vec[8] = {
9890     { simde_mm_set_epi32(INT32_C( -770989574), INT32_C(-1802114910), INT32_C( -905810098), INT32_C(  281731565)),
9891       simde_mm_set_epi32(INT32_C(  686010831), INT32_C(  213997367), INT32_C( 2036657150), INT32_C( -367169902)),
9892       simde_mm_set_epi32(INT32_C( 2036657150), INT32_C( -905810098), INT32_C( -367169902), INT32_C(  281731565)) },
9893     { simde_mm_set_epi32(INT32_C(  684505097), INT32_C( -943547273), INT32_C( 2024629977), INT32_C(-1297828743)),
9894       simde_mm_set_epi32(INT32_C(  129732692), INT32_C( -132692005), INT32_C( 1753719651), INT32_C(  797887589)),
9895       simde_mm_set_epi32(INT32_C( 1753719651), INT32_C( 2024629977), INT32_C(  797887589), INT32_C(-1297828743)) },
9896     { simde_mm_set_epi32(INT32_C(  387949033), INT32_C(-1389829892), INT32_C( 1634437660), INT32_C(  987104782)),
9897       simde_mm_set_epi32(INT32_C( 1472270782), INT32_C(-1211361843), INT32_C( 1300202787), INT32_C(-1458926351)),
9898       simde_mm_set_epi32(INT32_C( 1300202787), INT32_C( 1634437660), INT32_C(-1458926351), INT32_C(  987104782)) },
9899     { simde_mm_set_epi32(INT32_C( 1387165835), INT32_C( 1596078466), INT32_C( 2111249085), INT32_C( -462538744)),
9900       simde_mm_set_epi32(INT32_C(  726985449), INT32_C(-1973042432), INT32_C( 1711993574), INT32_C( -252208390)),
9901       simde_mm_set_epi32(INT32_C( 1711993574), INT32_C( 2111249085), INT32_C( -252208390), INT32_C( -462538744)) },
9902     { simde_mm_set_epi32(INT32_C( -873840401), INT32_C(-1400863586), INT32_C( -152555667), INT32_C(  262405549)),
9903       simde_mm_set_epi32(INT32_C( 1712851533), INT32_C( -780536033), INT32_C(   86038600), INT32_C(-1897640862)),
9904       simde_mm_set_epi32(INT32_C(   86038600), INT32_C( -152555667), INT32_C(-1897640862), INT32_C(  262405549)) },
9905     { simde_mm_set_epi32(INT32_C(  713207805), INT32_C( 1018253476), INT32_C( -802651368), INT32_C(  648837212)),
9906       simde_mm_set_epi32(INT32_C(-1970856576), INT32_C(-1694188003), INT32_C(  -44893783), INT32_C( 1278510166)),
9907       simde_mm_set_epi32(INT32_C(  -44893783), INT32_C( -802651368), INT32_C( 1278510166), INT32_C(  648837212)) },
9908     { simde_mm_set_epi32(INT32_C(-2023310953), INT32_C(-2005088176), INT32_C( -384132833), INT32_C( -492148829)),
9909       simde_mm_set_epi32(INT32_C(-1912005399), INT32_C( 2099038707), INT32_C( -679752655), INT32_C(-1564588725)),
9910       simde_mm_set_epi32(INT32_C( -679752655), INT32_C( -384132833), INT32_C(-1564588725), INT32_C( -492148829)) },
9911     { simde_mm_set_epi32(INT32_C( -499582549), INT32_C(  -35498139), INT32_C(-1863448787), INT32_C(  543686244)),
9912       simde_mm_set_epi32(INT32_C( 1271464253), INT32_C(-2102116933), INT32_C(-1435579282), INT32_C( 1623804943)),
9913       simde_mm_set_epi32(INT32_C(-1435579282), INT32_C(-1863448787), INT32_C( 1623804943), INT32_C(  543686244)) }
9914   };
9915 
9916   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9917     simde__m128i r = simde_mm_unpacklo_epi32(test_vec[i].a, test_vec[i].b);
9918     simde_assert_m128i_i32(r, ==, test_vec[i].r);
9919   }
9920 
9921   return 0;
9922 }
9923 
9924 static int
9925 test_simde_mm_unpacklo_epi64(SIMDE_MUNIT_TEST_ARGS) {
9926   const struct {
9927     simde__m128i a;
9928     simde__m128i b;
9929     simde__m128i r;
9930   } test_vec[8] = {
9931     { simde_mm_set_epi64x(INT64_C(-3311375003394119518), INT64_C(-3890424747014823443)),
9932       simde_mm_set_epi64x(INT64_C( 2946394084060780343), INT64_C( 8747375856342363794)),
9933       simde_mm_set_epi64x(INT64_C( 8747375856342363794), INT64_C(-3890424747014823443)) },
9934     { simde_mm_set_epi64x(INT64_C( 2939927008911727735), INT64_C( 8695719540713370745)),
9935       simde_mm_set_epi64x(INT64_C(  557197673524316123), INT64_C( 7532168548195421285)),
9936       simde_mm_set_epi64x(INT64_C( 7532168548195421285), INT64_C( 8695719540713370745)) },
9937     { simde_mm_set_epi64x(INT64_C( 1666228412154962172), INT64_C( 7019856298037872142)),
9938       simde_mm_set_epi64x(INT64_C( 6323354862629950925), INT64_C( 5584328451169094897)),
9939       simde_mm_set_epi64x(INT64_C( 5584328451169094897), INT64_C( 7019856298037872142)) },
9940     { simde_mm_set_epi64x(INT64_C( 5957831897049610626), INT64_C( 9067745777617352712)),
9941       simde_mm_set_epi64x(INT64_C( 3122378730444800768), INT64_C( 7352956415334914810)),
9942       simde_mm_set_epi64x(INT64_C( 7352956415334914810), INT64_C( 9067745777617352712)) },
9943     { simde_mm_set_epi64x(INT64_C(-3753115941324421986), INT64_C( -655221600322060883)),
9944       simde_mm_set_epi64x(INT64_C( 7356641320652896031), INT64_C(  369532975590952034)),
9945       simde_mm_set_epi64x(INT64_C(  369532975590952034), INT64_C( -655221600322060883)) },
9946     { simde_mm_set_epi64x(INT64_C( 3063204198745198756), INT64_C(-3447361375000823716)),
9947       simde_mm_set_epi64x(INT64_C(-8464764536425759203), INT64_C( -192817328500210602)),
9948       simde_mm_set_epi64x(INT64_C( -192817328500210602), INT64_C(-3447361375000823716)) },
9949     { simde_mm_set_epi64x(INT64_C(-8690054370483713968), INT64_C(-1649837951252011101)),
9950       simde_mm_set_epi64x(INT64_C(-8212000656381392397), INT64_C(-2919515419863792309)),
9951       simde_mm_set_epi64x(INT64_C(-2919515419863792309), INT64_C(-1649837951252011101)) },
9952     { simde_mm_set_epi64x(INT64_C(-2145690705347848347), INT64_C(-8003451597392183708)),
9953       simde_mm_set_epi64x(INT64_C( 5460897386860920251), INT64_C(-6165766065381356529)),
9954       simde_mm_set_epi64x(INT64_C(-6165766065381356529), INT64_C(-8003451597392183708)) }
9955   };
9956 
9957   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9958     simde__m128i r = simde_mm_unpacklo_epi64(test_vec[i].a, test_vec[i].b);
9959     simde_assert_m128i_i64(r, ==, test_vec[i].r);
9960   }
9961 
9962   return 0;
9963 }
9964 
9965 static int
9966 test_simde_mm_unpacklo_pd(SIMDE_MUNIT_TEST_ARGS) {
9967   const struct {
9968     simde__m128d a;
9969     simde__m128d b;
9970     simde__m128d r;
9971   } test_vec[8] = {
9972     { simde_mm_set_pd(SIMDE_FLOAT64_C(  160.82), SIMDE_FLOAT64_C( -868.81)),
9973       simde_mm_set_pd(SIMDE_FLOAT64_C(  640.98), SIMDE_FLOAT64_C(  578.20)),
9974       simde_mm_set_pd(SIMDE_FLOAT64_C(  578.20), SIMDE_FLOAT64_C( -868.81)) },
9975     { simde_mm_set_pd(SIMDE_FLOAT64_C( -900.35), SIMDE_FLOAT64_C(  829.02)),
9976       simde_mm_set_pd(SIMDE_FLOAT64_C( -680.55), SIMDE_FLOAT64_C(  -51.61)),
9977       simde_mm_set_pd(SIMDE_FLOAT64_C(  -51.61), SIMDE_FLOAT64_C(  829.02)) },
9978     { simde_mm_set_pd(SIMDE_FLOAT64_C(  560.63), SIMDE_FLOAT64_C(  395.65)),
9979       simde_mm_set_pd(SIMDE_FLOAT64_C( -681.25), SIMDE_FLOAT64_C(  -57.21)),
9980       simde_mm_set_pd(SIMDE_FLOAT64_C(  -57.21), SIMDE_FLOAT64_C(  395.65)) },
9981     { simde_mm_set_pd(SIMDE_FLOAT64_C(  938.21), SIMDE_FLOAT64_C( -628.45)),
9982       simde_mm_set_pd(SIMDE_FLOAT64_C( -939.59), SIMDE_FLOAT64_C( -183.36)),
9983       simde_mm_set_pd(SIMDE_FLOAT64_C( -183.36), SIMDE_FLOAT64_C( -628.45)) },
9984     { simde_mm_set_pd(SIMDE_FLOAT64_C(  352.81), SIMDE_FLOAT64_C( -540.34)),
9985       simde_mm_set_pd(SIMDE_FLOAT64_C( -819.35), SIMDE_FLOAT64_C( -238.91)),
9986       simde_mm_set_pd(SIMDE_FLOAT64_C( -238.91), SIMDE_FLOAT64_C( -540.34)) },
9987     { simde_mm_set_pd(SIMDE_FLOAT64_C(  435.92), SIMDE_FLOAT64_C(  320.63)),
9988       simde_mm_set_pd(SIMDE_FLOAT64_C( -314.42), SIMDE_FLOAT64_C( -394.55)),
9989       simde_mm_set_pd(SIMDE_FLOAT64_C( -394.55), SIMDE_FLOAT64_C(  320.63)) },
9990     { simde_mm_set_pd(SIMDE_FLOAT64_C( -256.77), SIMDE_FLOAT64_C(  784.61)),
9991       simde_mm_set_pd(SIMDE_FLOAT64_C( -354.05), SIMDE_FLOAT64_C(  -16.87)),
9992       simde_mm_set_pd(SIMDE_FLOAT64_C(  -16.87), SIMDE_FLOAT64_C(  784.61)) },
9993     { simde_mm_set_pd(SIMDE_FLOAT64_C(   81.23), SIMDE_FLOAT64_C(  882.56)),
9994       simde_mm_set_pd(SIMDE_FLOAT64_C( -661.47), SIMDE_FLOAT64_C( -202.79)),
9995       simde_mm_set_pd(SIMDE_FLOAT64_C( -202.79), SIMDE_FLOAT64_C(  882.56)) }
9996   };
9997 
9998   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
9999     simde__m128d r = simde_mm_unpacklo_pd(test_vec[i].a, test_vec[i].b);
10000     simde_assert_m128d_close(r, test_vec[i].r, 1);
10001   }
10002 
10003   return 0;
10004 }
10005 
10006 static int
10007 test_simde_mm_xor_pd(SIMDE_MUNIT_TEST_ARGS) {
10008   simde__m128d
10009     all_set = simde_x_mm_setone_pd(),
10010     all_unset = simde_mm_setzero_pd();
10011 
10012   simde_assert_m128d_equal(simde_mm_xor_pd(all_set, all_unset), all_set);
10013   simde_assert_m128d_equal(simde_mm_xor_pd(all_set, all_set), all_unset);
10014   simde_assert_m128d_equal(simde_mm_xor_pd(all_unset, all_unset), all_unset);
10015 
10016   return 0;
10017 }
10018 
10019 static int
10020 test_simde_mm_xor_si128(SIMDE_MUNIT_TEST_ARGS) {
10021   const struct {
10022     simde__m128i a;
10023     simde__m128i b;
10024     simde__m128i r;
10025   } test_vec[8] = {
10026     { simde_mm_set_epi32(INT32_C( 1623880239), INT32_C( 1318620160), INT32_C(-1283662193), INT32_C(-1453845482)),
10027       simde_mm_set_epi32(INT32_C(-1675083604), INT32_C(  603168286), INT32_C(  409798099), INT32_C(  632966287)),
10028       simde_mm_set_epi32(INT32_C(  -52284797), INT32_C( 1835733534), INT32_C(-1424893092), INT32_C(-1931323239)) },
10029     { simde_mm_set_epi32(INT32_C( 1509092554), INT32_C( 1648495442), INT32_C(-1486316171), INT32_C(  868417203)),
10030       simde_mm_set_epi32(INT32_C( 1183220554), INT32_C(-1650741405), INT32_C(-1277877547), INT32_C( -793058853)),
10031       simde_mm_set_epi32(INT32_C(  527724416), INT32_C(   -2513871), INT32_C(  347979680), INT32_C( -478666904)) },
10032     { simde_mm_set_epi32(INT32_C(  373711788), INT32_C(-1451210820), INT32_C( 1218370771), INT32_C( 1535794325)),
10033       simde_mm_set_epi32(INT32_C( -155546503), INT32_C(-2037105503), INT32_C( 1041195962), INT32_C(-1654529737)),
10034       simde_mm_set_epi32(INT32_C( -520294443), INT32_C(  789871389), INT32_C( 1989263209), INT32_C( -957629022)) },
10035     { simde_mm_set_epi32(INT32_C(-1223418601), INT32_C(  332961755), INT32_C(  688173092), INT32_C(  352304516)),
10036       simde_mm_set_epi32(INT32_C( -734452212), INT32_C( -791801405), INT32_C(  114386244), INT32_C(  996038140)),
10037       simde_mm_set_epi32(INT32_C( 1663908635), INT32_C(-1021934056), INT32_C(  802542944), INT32_C(  799139960)) },
10038     { simde_mm_set_epi32(INT32_C( 1204298996), INT32_C( 1777561493), INT32_C(  531158614), INT32_C(-1345218351)),
10039       simde_mm_set_epi32(INT32_C(  465699923), INT32_C(-1417149028), INT32_C(-1963684061), INT32_C( -837148929)),
10040       simde_mm_set_epi32(INT32_C( 1544167591), INT32_C(-1032099319), INT32_C(-1789109899), INT32_C( 1640728110)) },
10041     { simde_mm_set_epi32(INT32_C( 1401162168), INT32_C( -922039657), INT32_C( 1348044504), INT32_C( 1592606181)),
10042       simde_mm_set_epi32(INT32_C(-1635510345), INT32_C(-1462861610), INT32_C(-1206905626), INT32_C( -326154944)),
10043       simde_mm_set_epi32(INT32_C( -855630321), INT32_C( 1640254017), INT32_C( -397831618), INT32_C(-1302169435)) },
10044     { simde_mm_set_epi32(INT32_C(  882266138), INT32_C( 2140233068), INT32_C( -978476725), INT32_C( -962797184)),
10045       simde_mm_set_epi32(INT32_C( 1476434174), INT32_C(  732384170), INT32_C(  406886944), INT32_C( 1700501859)),
10046       simde_mm_set_epi32(INT32_C( 1821821156), INT32_C( 1412830918), INT32_C( -571655317), INT32_C(-1547208477)) },
10047     { simde_mm_set_epi32(INT32_C(  782585313), INT32_C( 1758933973), INT32_C(-1583302414), INT32_C(-1602193751)),
10048       simde_mm_set_epi32(INT32_C(  760188951), INT32_C(  624290102), INT32_C(  378021852), INT32_C(-1714147587)),
10049       simde_mm_set_epi32(INT32_C(   65723894), INT32_C( 1306712803), INT32_C(-1222074578), INT32_C(  961828948)) }
10050   };
10051 
10052   for (size_t i = 0 ; i < sizeof(test_vec) / sizeof(test_vec[0]) ; i++) {
10053     simde__m128i r = simde_mm_xor_si128(test_vec[i].a, test_vec[i].b);
10054     simde_assert_m128i_i64(r, ==, test_vec[i].r);
10055   }
10056 
10057   return 0;
10058 }
10059 
10060 static int
10061 test_simde_x_mm_not_si128(SIMDE_MUNIT_TEST_ARGS) {
10062   const struct {
10063     simde__m128i a;
10064     simde__m128i r;
10065   } test_vec[8] = {
10066     { simde_mm_set_epi32(INT32_C( -817965525), INT32_C( 2140859656), INT32_C(  142941694), INT32_C(-1061432158)),
10067       simde_mm_set_epi32(INT32_C(  817965524), INT32_C(-2140859657), INT32_C( -142941695), INT32_C( 1061432157)) },
10068     { simde_mm_set_epi32(INT32_C( 1656377120), INT32_C( 1182756765), INT32_C(  499148047), INT32_C( 1939837842)),
10069       simde_mm_set_epi32(INT32_C(-1656377121), INT32_C(-1182756766), INT32_C( -499148048), INT32_C(-1939837843)) },
10070     { simde_mm_set_epi32(INT32_C(-1391390683), INT32_C( -880299242), INT32_C( 1262346433), INT32_C(-1162276292)),
10071       simde_mm_set_epi32(INT32_C( 1391390682), INT32_C(  880299241), INT32_C(-1262346434), INT32_C( 1162276291)) },
10072     { simde_mm_set_epi32(INT32_C(  402553699), INT32_C(-1406117325), INT32_C(-1620159472), INT32_C( 1950201834)),
10073       simde_mm_set_epi32(INT32_C( -402553700), INT32_C( 1406117324), INT32_C( 1620159471), INT32_C(-1950201835)) },
10074     { simde_mm_set_epi32(INT32_C( 1201512664), INT32_C( -722158977), INT32_C(-1427673018), INT32_C(-1348620069)),
10075       simde_mm_set_epi32(INT32_C(-1201512665), INT32_C(  722158976), INT32_C( 1427673017), INT32_C( 1348620068)) },
10076     { simde_mm_set_epi32(INT32_C( 2022239253), INT32_C(  336656978), INT32_C(-2043097029), INT32_C( 2060912582)),
10077       simde_mm_set_epi32(INT32_C(-2022239254), INT32_C( -336656979), INT32_C( 2043097028), INT32_C(-2060912583)) },
10078     { simde_mm_set_epi32(INT32_C(-1767401405), INT32_C(  988173440), INT32_C(  653493949), INT32_C( 1545873213)),
10079       simde_mm_set_epi32(INT32_C( 1767401404), INT32_C( -988173441), INT32_C( -653493950), INT32_C(-1545873214)) },
10080     { simde_mm_set_epi32(INT32_C(  164259681), INT32_C( 1625402133), INT32_C(  274817939), INT32_C( 1382941610)),
10081       simde_mm_set_epi32(INT32_C( -164259682), INT32_C(-1625402134), INT32_C( -274817940), INT32_C(-1382941611)) }
10082   };
10083 
10084   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
10085     simde__m128i r = simde_x_mm_not_si128(test_vec[i].a);
10086     simde_assert_m128i_i32(r, ==, test_vec[i].r);
10087   }
10088 
10089   return 0;
10090 }
10091 
10092 static int
10093 test_simde_x_mm_mul_epi64(SIMDE_MUNIT_TEST_ARGS) {
10094   const struct {
10095     simde__m128i a;
10096     simde__m128i b;
10097     simde__m128i r;
10098   } test_vec[8] = {
10099     { simde_mm_set_epi64x(INT64_C(-6673265146157132667), INT64_C(-8036865195274064518)),
10100       simde_mm_set_epi64x(INT64_C( 4763310881806863840), INT64_C(-2953190602401137090)),
10101       simde_mm_set_epi64x(INT64_C(  804621865193403744), INT64_C( 7037306546512957324)) },
10102     { simde_mm_set_epi64x(INT64_C( 4912321112367014754), INT64_C( 5506077972841640415)),
10103       simde_mm_set_epi64x(INT64_C( 5790159379234202843), INT64_C(-7860297575342104977)),
10104       simde_mm_set_epi64x(INT64_C(-6503632121046397738), INT64_C(-1366099594229104207)) },
10105     { simde_mm_set_epi64x(INT64_C( 2749162021411530208), INT64_C(  408462426494202626)),
10106       simde_mm_set_epi64x(INT64_C( 8447492608754880299), INT64_C(-7046703966410124624)),
10107       simde_mm_set_epi64x(INT64_C(-4973831282761794400), INT64_C( 2599589224149726560)) },
10108     { simde_mm_set_epi64x(INT64_C(  -88834185851708236), INT64_C(-8089393205327952234)),
10109       simde_mm_set_epi64x(INT64_C(  381269932343520540), INT64_C( 2138325983301945876)),
10110       simde_mm_set_epi64x(INT64_C(-7088569628310845520), INT64_C( 1233235991476166584)) },
10111     { simde_mm_set_epi64x(INT64_C( -822706701071313394), INT64_C(-2759012498076821456)),
10112       simde_mm_set_epi64x(INT64_C( 3465917358098376677), INT64_C(-7954598628423398790)),
10113       simde_mm_set_epi64x(INT64_C(-6698232051336684410), INT64_C(-6956668788971772192)) },
10114     { simde_mm_set_epi64x(INT64_C( 8188114688325369058), INT64_C(-5073366312523094897)),
10115       simde_mm_set_epi64x(INT64_C(-8915693716470801407), INT64_C( 9186903668894606147)),
10116       simde_mm_set_epi64x(INT64_C( 3677373050832155874), INT64_C(-2924803137816977811)) },
10117     { simde_mm_set_epi64x(INT64_C(-5966336380315033651), INT64_C( 8263120995643775133)),
10118       simde_mm_set_epi64x(INT64_C(-4262947749795433008), INT64_C(-8185205248719856231)),
10119       simde_mm_set_epi64x(INT64_C(-5369329972927887472), INT64_C(-4868166633591505195)) },
10120     { simde_mm_set_epi64x(INT64_C( 2800078338557512603), INT64_C(-7382248080413965284)),
10121       simde_mm_set_epi64x(INT64_C( -645055313537887494), INT64_C( 2018860835012845242)),
10122       simde_mm_set_epi64x(INT64_C( 3796538949364005726), INT64_C(-1962708987484978088)) }
10123   };
10124 
10125   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
10126     simde__m128i r = simde_x_mm_mul_epi64(test_vec[i].a, test_vec[i].b);
10127     simde_assert_m128i_i64(r, ==, test_vec[i].r);
10128   }
10129 
10130   return 0;
10131 }
10132 
10133 static int
10134 test_simde_x_mm_sub_epu32(SIMDE_MUNIT_TEST_ARGS) {
10135   const struct {
10136     simde__m128i a;
10137     simde__m128i b;
10138     simde__m128i r;
10139   } test_vec[8] = {
10140     { simde_x_mm_set_epu32(UINT32_C( 591915169), UINT32_C(1162556909), UINT32_C(2711661198), UINT32_C( 649386420)),
10141       simde_x_mm_set_epu32(UINT32_C(2963858433), UINT32_C( 387638488), UINT32_C(4276033779), UINT32_C(2372843734)),
10142       simde_x_mm_set_epu32(UINT32_C(1923024032), UINT32_C( 774918421), UINT32_C(2730594715), UINT32_C(2571509982)) },
10143     { simde_x_mm_set_epu32(UINT32_C( 445936307), UINT32_C(1465838226), UINT32_C(3055798709), UINT32_C(2785403539)),
10144       simde_x_mm_set_epu32(UINT32_C(2376292101), UINT32_C(2800453656), UINT32_C(2012288479), UINT32_C(1067663469)),
10145       simde_x_mm_set_epu32(UINT32_C(2364611502), UINT32_C(2960351866), UINT32_C(1043510230), UINT32_C(1717740070)) },
10146     { simde_x_mm_set_epu32(UINT32_C( 766825118), UINT32_C(3689178364), UINT32_C(1309713860), UINT32_C(1635279642)),
10147       simde_x_mm_set_epu32(UINT32_C(1287494965), UINT32_C(3931214929), UINT32_C( 130800549), UINT32_C(1579059128)),
10148       simde_x_mm_set_epu32(UINT32_C(3774297449), UINT32_C(4052930731), UINT32_C(1178913311), UINT32_C(  56220514)) },
10149     { simde_x_mm_set_epu32(UINT32_C(1521150506), UINT32_C( 229274390), UINT32_C(2137370048), UINT32_C(1343959137)),
10150       simde_x_mm_set_epu32(UINT32_C( 919906837), UINT32_C(4230649021), UINT32_C(2105941239), UINT32_C(3460244161)),
10151       simde_x_mm_set_epu32(UINT32_C( 601243669), UINT32_C( 293592665), UINT32_C(  31428809), UINT32_C(2178682272)) },
10152     { simde_x_mm_set_epu32(UINT32_C(1275529272), UINT32_C(2231818861), UINT32_C(2063802469), UINT32_C(3732401863)),
10153       simde_x_mm_set_epu32(UINT32_C(2896374047), UINT32_C(1493829257), UINT32_C(2939390855), UINT32_C(1941911553)),
10154       simde_x_mm_set_epu32(UINT32_C(2674122521), UINT32_C( 737989604), UINT32_C(3419378910), UINT32_C(1790490310)) },
10155     { simde_x_mm_set_epu32(UINT32_C(3017205359), UINT32_C(2429422013), UINT32_C(3351841835), UINT32_C(2341203472)),
10156       simde_x_mm_set_epu32(UINT32_C(3000898366), UINT32_C(1136654732), UINT32_C(2535059098), UINT32_C(  90134778)),
10157       simde_x_mm_set_epu32(UINT32_C(  16306993), UINT32_C(1292767281), UINT32_C( 816782737), UINT32_C(2251068694)) },
10158     { simde_x_mm_set_epu32(UINT32_C(  71842021), UINT32_C(1910901245), UINT32_C( 252676465), UINT32_C(3861146107)),
10159       simde_x_mm_set_epu32(UINT32_C(4061170475), UINT32_C(3890236125), UINT32_C(1645686841), UINT32_C(3708385897)),
10160       simde_x_mm_set_epu32(UINT32_C( 305638842), UINT32_C(2315632416), UINT32_C(2901956920), UINT32_C( 152760210)) },
10161     { simde_x_mm_set_epu32(UINT32_C(1390785465), UINT32_C( 237201350), UINT32_C(3330556421), UINT32_C( 382557765)),
10162       simde_x_mm_set_epu32(UINT32_C( 919261037), UINT32_C(4138415457), UINT32_C( 812238579), UINT32_C( 103076353)),
10163       simde_x_mm_set_epu32(UINT32_C( 471524428), UINT32_C( 393753189), UINT32_C(2518317842), UINT32_C( 279481412)) }
10164   };
10165 
10166   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
10167     simde__m128i r = simde_x_mm_sub_epu32(test_vec[i].a, test_vec[i].b);
10168     simde_assert_m128i_u32(r, ==, test_vec[i].r);
10169   }
10170 
10171   return 0;
10172 }
10173 
10174 static int
10175 test_simde_x_mm_mod_epi64(SIMDE_MUNIT_TEST_ARGS) {
10176   const struct {
10177     simde__m128i a;
10178     simde__m128i b;
10179     simde__m128i r;
10180   } test_vec[8] = {
10181     { simde_mm_set_epi64x(INT64_C(-8053187774363015445), INT64_C( 9050551738356525681)),
10182       simde_mm_set_epi64x(INT64_C(-5432362900125533563), INT64_C( 4656333831414330662)),
10183       simde_mm_set_epi64x(INT64_C(-2620824874237481882), INT64_C( 4394217906942195019)) },
10184     { simde_mm_set_epi64x(INT64_C( 3643434954039553447), INT64_C(-6234539097175065740)),
10185       simde_mm_set_epi64x(INT64_C(-1834126128625936904), INT64_C( 5974848154734978575)),
10186       simde_mm_set_epi64x(INT64_C( 1809308825413616543), INT64_C( -259690942440087165)) },
10187     { simde_mm_set_epi64x(INT64_C( 9161306297850640165), INT64_C(-8306180370740150176)),
10188       simde_mm_set_epi64x(INT64_C( 2055562205091916701), INT64_C(-6680168448646461201)),
10189       simde_mm_set_epi64x(INT64_C(  939057477482973361), INT64_C(-1626011922093688975)) },
10190     { simde_mm_set_epi64x(INT64_C(-8267679289606370918), INT64_C(-5928191487249150522)),
10191       simde_mm_set_epi64x(INT64_C( -367540592738432621), INT64_C(-9016984680455221058)),
10192       simde_mm_set_epi64x(INT64_C( -181786249360853256), INT64_C(-5928191487249150522)) },
10193     { simde_mm_set_epi64x(INT64_C(-5911217161035399691), INT64_C(-1038656028139092449)),
10194       simde_mm_set_epi64x(INT64_C(  170272479168034452), INT64_C( 6507756447489319344)),
10195       simde_mm_set_epi64x(INT64_C( -121952869322228323), INT64_C(-1038656028139092449)) },
10196     { simde_mm_set_epi64x(INT64_C(-8644627274378588029), INT64_C( 5613017538463476646)),
10197       simde_mm_set_epi64x(INT64_C(-8247421513208151154), INT64_C(-1150990985458942599)),
10198       simde_mm_set_epi64x(INT64_C( -397205761170436875), INT64_C( 1009053596627706250)) },
10199     { simde_mm_set_epi64x(INT64_C( 8688276933216716257), INT64_C( -409477294924409172)),
10200       simde_mm_set_epi64x(INT64_C( 7651480072460119172), INT64_C( 5980691967331237074)),
10201       simde_mm_set_epi64x(INT64_C( 1036796860756597085), INT64_C( -409477294924409172)) },
10202     { simde_mm_set_epi64x(INT64_C(-6308927419868714376), INT64_C( 6327163388033237975)),
10203       simde_mm_set_epi64x(INT64_C( 4310605020200368092), INT64_C( 1934689183910316990)),
10204       simde_mm_set_epi64x(INT64_C(-1998322399668346284), INT64_C(  523095836302287005)) }
10205   };
10206 
10207   for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) {
10208     simde__m128i r = simde_x_mm_mod_epi64(test_vec[i].a, test_vec[i].b);
10209     simde_assert_m128i_i64(r, ==, test_vec[i].r);
10210   }
10211 
10212   return 0;
10213 }
10214 
10215 SIMDE_TEST_FUNC_LIST_BEGIN
10216   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_pd)
10217   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_pd)
10218   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_pd1)
10219   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_sd)
10220   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi8)
10221   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi16)
10222   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi32)
10223   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi64)
10224   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_epi64x)
10225   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi8)
10226   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi16)
10227   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi32)
10228   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi64)
10229   SIMDE_TEST_FUNC_LIST_ENTRY(mm_set1_epi64x)
10230   SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_pd)
10231   SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_epi8)
10232   SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_epi16)
10233   SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_epi32)
10234   SIMDE_TEST_FUNC_LIST_ENTRY(mm_setr_epi64)
10235   SIMDE_TEST_FUNC_LIST_ENTRY(mm_setzero_pd)
10236   SIMDE_TEST_FUNC_LIST_ENTRY(mm_setzero_si128)
10237 
10238   SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_abs_pd)
10239 
10240   SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_epi8)
10241   SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_epi16)
10242   SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_epi32)
10243   SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_epi64)
10244   SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_pd)
10245   SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_sd)
10246   SIMDE_TEST_FUNC_LIST_ENTRY(mm_add_si64)
10247   SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_epi8)
10248   SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_epi16)
10249   SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_epu8)
10250   SIMDE_TEST_FUNC_LIST_ENTRY(mm_adds_epu16)
10251 
10252   SIMDE_TEST_FUNC_LIST_ENTRY(mm_and_pd)
10253   SIMDE_TEST_FUNC_LIST_ENTRY(mm_and_si128)
10254   SIMDE_TEST_FUNC_LIST_ENTRY(mm_andnot_pd)
10255   SIMDE_TEST_FUNC_LIST_ENTRY(mm_andnot_si128)
10256 
10257   SIMDE_TEST_FUNC_LIST_ENTRY(mm_avg_epu8)
10258   SIMDE_TEST_FUNC_LIST_ENTRY(mm_avg_epu16)
10259 
10260   SIMDE_TEST_FUNC_LIST_ENTRY(mm_bslli_si128)
10261   SIMDE_TEST_FUNC_LIST_ENTRY(mm_bsrli_si128)
10262   SIMDE_TEST_FUNC_LIST_ENTRY(mm_slli_epi16)
10263   SIMDE_TEST_FUNC_LIST_ENTRY(mm_slli_epi32)
10264   SIMDE_TEST_FUNC_LIST_ENTRY(mm_slli_epi64)
10265   SIMDE_TEST_FUNC_LIST_ENTRY(mm_srli_epi16)
10266   SIMDE_TEST_FUNC_LIST_ENTRY(mm_srli_epi32)
10267   SIMDE_TEST_FUNC_LIST_ENTRY(mm_srli_epi64)
10268 
10269   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sra_epi16)
10270   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sra_epi32)
10271   SIMDE_TEST_FUNC_LIST_ENTRY(mm_srai_epi16)
10272   SIMDE_TEST_FUNC_LIST_ENTRY(mm_srai_epi32)
10273 
10274   SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_pd)
10275   SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_pd1)
10276   SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_sd)
10277   SIMDE_TEST_FUNC_LIST_ENTRY(mm_store_si128)
10278   SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeh_pd)
10279   SIMDE_TEST_FUNC_LIST_ENTRY(mm_storel_epi64)
10280   SIMDE_TEST_FUNC_LIST_ENTRY(mm_storel_pd)
10281   SIMDE_TEST_FUNC_LIST_ENTRY(mm_storer_pd)
10282   SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_pd)
10283   SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_si128)
10284   SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_si16)
10285   SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_si32)
10286   SIMDE_TEST_FUNC_LIST_ENTRY(mm_storeu_si64)
10287   SIMDE_TEST_FUNC_LIST_ENTRY(mm_store1_pd)
10288 
10289   SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_pd)
10290   SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_si128)
10291   SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_si32)
10292   SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_si64)
10293 
10294   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_epi8)
10295   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_epi16)
10296   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_epi32)
10297   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_epi64)
10298   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_pd)
10299   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_sd)
10300   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sub_si64)
10301   SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_epi8)
10302   SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_epi16)
10303   SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_epu8)
10304   SIMDE_TEST_FUNC_LIST_ENTRY(mm_subs_epu16)
10305 
10306   SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_epu8)
10307   SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_epi16)
10308   SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_pd)
10309   SIMDE_TEST_FUNC_LIST_ENTRY(mm_min_sd)
10310   SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_epu8)
10311   SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_epi16)
10312   SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_pd)
10313   SIMDE_TEST_FUNC_LIST_ENTRY(mm_max_sd)
10314 
10315   SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_epu32)
10316   SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_pd)
10317   SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_sd)
10318   SIMDE_TEST_FUNC_LIST_ENTRY(mm_mul_su32)
10319   SIMDE_TEST_FUNC_LIST_ENTRY(mm_mulhi_epi16)
10320   SIMDE_TEST_FUNC_LIST_ENTRY(mm_mulhi_epu16)
10321   SIMDE_TEST_FUNC_LIST_ENTRY(mm_mullo_epi16)
10322 
10323   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_epi8)
10324   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_epi16)
10325   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_epi32)
10326   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_pd)
10327   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpeq_sd)
10328   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpneq_pd)
10329   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpneq_sd)
10330   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_epi8)
10331   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_epi16)
10332   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_epi32)
10333   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_pd)
10334   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmplt_sd)
10335   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnlt_pd)
10336   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnlt_sd)
10337   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmple_pd)
10338   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmple_sd)
10339   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnle_pd)
10340   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnle_sd)
10341   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_epi8)
10342   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_epi16)
10343   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_epi32)
10344   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_pd)
10345   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpgt_sd)
10346   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpngt_pd)
10347   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpngt_sd)
10348   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpge_pd)
10349   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpge_sd)
10350   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnge_pd)
10351   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpnge_sd)
10352   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpord_pd)
10353   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpord_sd)
10354   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpunord_pd)
10355   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cmpunord_sd)
10356 
10357   SIMDE_TEST_FUNC_LIST_ENTRY(mm_castpd_ps)
10358   SIMDE_TEST_FUNC_LIST_ENTRY(mm_castps_pd)
10359   SIMDE_TEST_FUNC_LIST_ENTRY(mm_castsi128_pd)
10360   SIMDE_TEST_FUNC_LIST_ENTRY(mm_castsi128_ps)
10361 
10362   SIMDE_TEST_FUNC_LIST_ENTRY(mm_comieq_sd)
10363   SIMDE_TEST_FUNC_LIST_ENTRY(mm_comige_sd)
10364   SIMDE_TEST_FUNC_LIST_ENTRY(mm_comigt_sd)
10365   SIMDE_TEST_FUNC_LIST_ENTRY(mm_comile_sd)
10366   SIMDE_TEST_FUNC_LIST_ENTRY(mm_comilt_sd)
10367   SIMDE_TEST_FUNC_LIST_ENTRY(mm_comineq_sd)
10368   SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomieq_sd)
10369   SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomige_sd)
10370   SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomigt_sd)
10371   SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomile_sd)
10372   SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomilt_sd)
10373   SIMDE_TEST_FUNC_LIST_ENTRY(mm_ucomineq_sd)
10374 
10375   SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_copysign_pd)
10376   SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_xorsign_pd)
10377 
10378   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepi32_pd)
10379   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtepi32_ps)
10380   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpd_epi32)
10381   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpd_pi32)
10382   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpd_ps)
10383   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtpi32_pd)
10384   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtps_epi32)
10385   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtps_pd)
10386   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsd_f64)
10387   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsd_si32)
10388   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsd_si64)
10389   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsd_ss)
10390   SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_cvtsi128_si16)
10391   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi128_si32)
10392   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi128_si64)
10393   SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_cvtsi16_si128)
10394   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi32_sd)
10395   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi32_si128)
10396   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi64_sd)
10397   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtsi64_si128)
10398   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvtss_sd)
10399   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttpd_epi32)
10400   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttpd_pi32)
10401   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttps_epi32)
10402   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttsd_si32)
10403   SIMDE_TEST_FUNC_LIST_ENTRY(mm_cvttsd_si64)
10404 
10405   SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_pd)
10406   SIMDE_TEST_FUNC_LIST_ENTRY(mm_div_sd)
10407 
10408   SIMDE_TEST_FUNC_LIST_ENTRY(mm_extract_epi16)
10409   SIMDE_TEST_FUNC_LIST_ENTRY(mm_insert_epi16)
10410 
10411   SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_pd)
10412   SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_pd1)
10413   SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_sd)
10414   SIMDE_TEST_FUNC_LIST_ENTRY(mm_load_si128)
10415   SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadh_pd)
10416   SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadl_epi64)
10417   SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadl_pd)
10418   SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadr_pd)
10419   SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_pd)
10420   SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_si128)
10421   SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_si16)
10422   SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_si32)
10423   SIMDE_TEST_FUNC_LIST_ENTRY(mm_loadu_si64)
10424 
10425   SIMDE_TEST_FUNC_LIST_ENTRY(mm_movemask_epi8)
10426   SIMDE_TEST_FUNC_LIST_ENTRY(mm_movemask_pd)
10427   SIMDE_TEST_FUNC_LIST_ENTRY(mm_maskmoveu_si128)
10428 
10429   SIMDE_TEST_FUNC_LIST_ENTRY(mm_move_epi64)
10430   SIMDE_TEST_FUNC_LIST_ENTRY(mm_move_sd)
10431   SIMDE_TEST_FUNC_LIST_ENTRY(mm_movepi64_pi64)
10432   SIMDE_TEST_FUNC_LIST_ENTRY(mm_movpi64_epi64)
10433 
10434   SIMDE_TEST_FUNC_LIST_ENTRY(mm_or_pd)
10435   SIMDE_TEST_FUNC_LIST_ENTRY(mm_or_si128)
10436 
10437   SIMDE_TEST_FUNC_LIST_ENTRY(mm_packs_epi16)
10438   SIMDE_TEST_FUNC_LIST_ENTRY(mm_packs_epi32)
10439   SIMDE_TEST_FUNC_LIST_ENTRY(mm_packus_epi16)
10440 
10441   SIMDE_TEST_FUNC_LIST_ENTRY(mm_undefined_pd)
10442   SIMDE_TEST_FUNC_LIST_ENTRY(mm_undefined_si128)
10443 
10444   SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_epi8)
10445   SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_epi16)
10446   SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_epi32)
10447   SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_epi64)
10448   SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpackhi_pd)
10449   SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_epi8)
10450   SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_epi16)
10451   SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_epi32)
10452   SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_epi64)
10453   SIMDE_TEST_FUNC_LIST_ENTRY(mm_unpacklo_pd)
10454 
10455   SIMDE_TEST_FUNC_LIST_ENTRY(mm_shuffle_epi32)
10456   SIMDE_TEST_FUNC_LIST_ENTRY(mm_shuffle_pd)
10457   SIMDE_TEST_FUNC_LIST_ENTRY(mm_shufflehi_epi16)
10458   SIMDE_TEST_FUNC_LIST_ENTRY(mm_shufflelo_epi16)
10459 
10460   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sll_epi16)
10461   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sll_epi32)
10462   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sll_epi64)
10463   SIMDE_TEST_FUNC_LIST_ENTRY(mm_srl_epi16)
10464   SIMDE_TEST_FUNC_LIST_ENTRY(mm_srl_epi32)
10465   SIMDE_TEST_FUNC_LIST_ENTRY(mm_srl_epi64)
10466 
10467   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sqrt_pd)
10468   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sqrt_sd)
10469 
10470   SIMDE_TEST_FUNC_LIST_ENTRY(mm_madd_epi16)
10471   SIMDE_TEST_FUNC_LIST_ENTRY(mm_sad_epu8)
10472 
10473   SIMDE_TEST_FUNC_LIST_ENTRY(mm_xor_pd)
10474   SIMDE_TEST_FUNC_LIST_ENTRY(mm_xor_si128)
10475 
10476   SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_not_si128)
10477   SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_sub_epu32)
10478   SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_mul_epi64)
10479   SIMDE_TEST_FUNC_LIST_ENTRY(x_mm_mod_epi64)
10480 SIMDE_TEST_FUNC_LIST_END
10481 
10482 #include <test/x86/test-x86-footer.h>
10483