1 // REQUIRES: aarch64-registered-target 2 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ 3 // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s 4 5 // Test new aarch64 intrinsics with poly64 6 7 #include <arm_neon.h> 8 9 uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) { 10 // CHECK: test_vceq_p64 11 return vceq_p64(a, b); 12 // CHECK: cmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 13 } 14 15 uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) { 16 // CHECK: test_vceqq_p64 17 return vceqq_p64(a, b); 18 // CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 19 } 20 21 uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) { 22 // CHECK: test_vtst_p64 23 return vtst_p64(a, b); 24 // CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 25 } 26 27 uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) { 28 // CHECK: test_vtstq_p64 29 return vtstq_p64(a, b); 30 // CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d 31 } 32 33 poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) { 34 // CHECK: test_vbsl_p64 35 return vbsl_p64(a, b, c); 36 // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b 37 } 38 39 poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) { 40 // CHECK: test_vbslq_p64 41 return vbslq_p64(a, b, c); 42 // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 43 } 44 45 poly64_t test_vget_lane_p64(poly64x1_t v) { 46 // CHECK: test_vget_lane_p64 47 return vget_lane_p64(v, 0); 48 // CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} 49 } 50 51 poly64_t test_vgetq_lane_p64(poly64x2_t v) { 52 // CHECK: test_vgetq_lane_p64 53 return vgetq_lane_p64(v, 1); 54 // CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1] 55 } 56 57 poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) { 58 // CHECK: test_vset_lane_p64 59 return vset_lane_p64(a, v, 0); 60 // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 61 } 62 63 poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) { 64 // CHECK: test_vsetq_lane_p64 65 return vsetq_lane_p64(a, v, 1); 66 // CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}} 67 } 68 69 poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) { 70 // CHECK: test_vcopy_lane_p64 71 return vcopy_lane_p64(a, 0, b, 0); 72 // CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}} 73 } 74 75 poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) { 76 // CHECK: test_vcopyq_lane_p64 77 return vcopyq_lane_p64(a, 1, b, 0); 78 // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 79 } 80 81 poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) { 82 // CHECK: test_vcopyq_laneq_p64 83 return vcopyq_laneq_p64(a, 1, b, 1); 84 // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1] 85 } 86 87 poly64x1_t test_vcreate_p64(uint64_t a) { 88 // CHECK: test_vcreate_p64 89 return vcreate_p64(a); 90 // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 91 } 92 93 poly64x1_t test_vdup_n_p64(poly64_t a) { 94 // CHECK: test_vdup_n_p64 95 return vdup_n_p64(a); 96 // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} 97 } 98 poly64x2_t test_vdupq_n_p64(poly64_t a) { 99 // CHECK: test_vdup_n_p64 100 return vdupq_n_p64(a); 101 // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} 102 } 103 104 poly64x1_t test_vdup_lane_p64(poly64x1_t vec) { 105 // CHECK: test_vdup_lane_p64 106 return vdup_lane_p64(vec, 0); 107 // CHECK: ret 108 } 109 110 poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) { 111 // CHECK: test_vdupq_lane_p64 112 return vdupq_lane_p64(vec, 0); 113 // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 114 } 115 116 poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) { 117 // CHECK: test_vdupq_laneq_p64 118 return vdupq_laneq_p64(vec, 1); 119 // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 120 } 121 122 poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) { 123 // CHECK: test_vcombine_p64 124 return vcombine_p64(low, high); 125 // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 126 } 127 128 poly64x1_t test_vld1_p64(poly64_t const * ptr) { 129 // CHECK: test_vld1_p64 130 return vld1_p64(ptr); 131 // CHECK: ld1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] 132 } 133 134 poly64x2_t test_vld1q_p64(poly64_t const * ptr) { 135 // CHECK: test_vld1q_p64 136 return vld1q_p64(ptr); 137 // CHECK: ld1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] 138 } 139 140 void test_vst1_p64(poly64_t * ptr, poly64x1_t val) { 141 // CHECK: test_vst1_p64 142 return vst1_p64(ptr, val); 143 // CHECK: st1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] 144 } 145 146 void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) { 147 // CHECK: test_vst1q_p64 148 return vst1q_p64(ptr, val); 149 // CHECK: st1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] 150 } 151 152 poly64x1x2_t test_vld2_p64(poly64_t const * ptr) { 153 // CHECK: test_vld2_p64 154 return vld2_p64(ptr); 155 // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] 156 } 157 158 poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) { 159 // CHECK: test_vld2q_p64 160 return vld2q_p64(ptr); 161 // CHECK: ld2 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] 162 } 163 164 poly64x1x3_t test_vld3_p64(poly64_t const * ptr) { 165 // CHECK: test_vld3_p64 166 return vld3_p64(ptr); 167 // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] 168 } 169 170 poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) { 171 // CHECK: test_vld3q_p64 172 return vld3q_p64(ptr); 173 // CHECK: ld3 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] 174 } 175 176 poly64x1x4_t test_vld4_p64(poly64_t const * ptr) { 177 // CHECK: test_vld4_p64 178 return vld4_p64(ptr); 179 // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] 180 } 181 182 poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) { 183 // CHECK: test_vld4q_p64 184 return vld4q_p64(ptr); 185 // CHECK: ld4 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] 186 } 187 188 void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) { 189 // CHECK: test_vst2_p64 190 return vst2_p64(ptr, val); 191 // CHECK: st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] 192 } 193 194 void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) { 195 // CHECK: test_vst2q_p64 196 return vst2q_p64(ptr, val); 197 // CHECK: st2 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] 198 } 199 200 void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) { 201 // CHECK: test_vst3_p64 202 return vst3_p64(ptr, val); 203 // CHECK: st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] 204 } 205 206 void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) { 207 // CHECK: test_vst3q_p64 208 return vst3q_p64(ptr, val); 209 // CHECK: st3 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] 210 } 211 212 void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) { 213 // CHECK: test_vst4_p64 214 return vst4_p64(ptr, val); 215 // CHECK: st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}] 216 } 217 218 void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) { 219 // CHECK: test_vst4q_p64 220 return vst4q_p64(ptr, val); 221 // CHECK: st4 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}] 222 } 223 224 poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) { 225 // CHECK: test_vext_p64 226 return vext_u64(a, b, 0); 227 228 } 229 230 poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) { 231 // CHECK: test_vextq_p64 232 return vextq_p64(a, b, 1); 233 // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8 234 } 235 236 poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) { 237 // CHECK: test_vzip1q_p64 238 return vzip1q_p64(a, b); 239 // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 240 } 241 242 poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) { 243 // CHECK: test_vzip2q_p64 244 return vzip2q_u64(a, b); 245 // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] 246 } 247 248 poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) { 249 // CHECK: test_vuzp1q_p64 250 return vuzp1q_p64(a, b); 251 // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 252 } 253 254 poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) { 255 // CHECK: test_vuzp2q_p64 256 return vuzp2q_u64(a, b); 257 // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] 258 } 259 260 poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) { 261 // CHECK: test_vtrn1q_p64 262 return vtrn1q_p64(a, b); 263 // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] 264 } 265 266 poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) { 267 // CHECK: test_vtrn2q_p64 268 return vtrn2q_u64(a, b); 269 // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] 270 } 271 272 poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) { 273 // CHECK: test_vsri_n_p64 274 return vsri_n_p64(a, b, 33); 275 // CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #33 276 } 277 278 poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) { 279 // CHECK: test_vsriq_n_p64 280 return vsriq_n_p64(a, b, 64); 281 // CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #64 282 } 283 284