1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s 2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s 3 4define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 5 ; CHECK: add_v4f32: 6 7 %1 = load <4 x float>* %a 8 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 9 %2 = load <4 x float>* %b 10 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 11 %3 = fadd <4 x float> %1, %2 12 ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 13 store <4 x float> %3, <4 x float>* %c 14 ; CHECK-DAG: st.w [[R3]], 0($4) 15 16 ret void 17 ; CHECK: .size add_v4f32 18} 19 20define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 21 ; CHECK: add_v2f64: 22 23 %1 = load <2 x double>* %a 24 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 25 %2 = load <2 x double>* %b 26 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 27 %3 = fadd <2 x double> %1, %2 28 ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 29 store <2 x double> %3, <2 x double>* %c 30 ; CHECK-DAG: st.d [[R3]], 0($4) 31 32 ret void 33 ; CHECK: .size add_v2f64 34} 35 36define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 37 ; CHECK: sub_v4f32: 38 39 %1 = load <4 x float>* %a 40 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 41 %2 = load <4 x float>* %b 42 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 43 %3 = fsub <4 x float> %1, %2 44 ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 45 store <4 x float> %3, <4 x float>* %c 46 ; CHECK-DAG: st.w [[R3]], 0($4) 47 48 ret void 49 ; CHECK: .size sub_v4f32 50} 51 52define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 53 ; CHECK: sub_v2f64: 54 55 %1 = load <2 x double>* %a 56 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 57 %2 = load <2 x double>* %b 58 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 59 %3 = fsub <2 x double> %1, %2 60 ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 61 store <2 x double> %3, <2 x double>* %c 62 ; CHECK-DAG: st.d [[R3]], 0($4) 63 64 ret void 65 ; CHECK: .size sub_v2f64 66} 67 68define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 69 ; CHECK: mul_v4f32: 70 71 %1 = load <4 x float>* %a 72 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 73 %2 = load <4 x float>* %b 74 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 75 %3 = fmul <4 x float> %1, %2 76 ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 77 store <4 x float> %3, <4 x float>* %c 78 ; CHECK-DAG: st.w [[R3]], 0($4) 79 80 ret void 81 ; CHECK: .size mul_v4f32 82} 83 84define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 85 ; CHECK: mul_v2f64: 86 87 %1 = load <2 x double>* %a 88 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 89 %2 = load <2 x double>* %b 90 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 91 %3 = fmul <2 x double> %1, %2 92 ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 93 store <2 x double> %3, <2 x double>* %c 94 ; CHECK-DAG: st.d [[R3]], 0($4) 95 96 ret void 97 ; CHECK: .size mul_v2f64 98} 99 100define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, 101 <4 x float>* %c) nounwind { 102 ; CHECK: fma_v4f32: 103 104 %1 = load <4 x float>* %a 105 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 106 %2 = load <4 x float>* %b 107 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 108 %3 = load <4 x float>* %c 109 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) 110 %4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2, 111 <4 x float> %3) 112 ; CHECK-DAG: fmadd.w [[R1]], [[R2]], [[R3]] 113 store <4 x float> %4, <4 x float>* %d 114 ; CHECK-DAG: st.w [[R1]], 0($4) 115 116 ret void 117 ; CHECK: .size fma_v4f32 118} 119 120define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, 121 <2 x double>* %c) nounwind { 122 ; CHECK: fma_v2f64: 123 124 %1 = load <2 x double>* %a 125 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 126 %2 = load <2 x double>* %b 127 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 128 %3 = load <2 x double>* %c 129 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) 130 %4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2, 131 <2 x double> %3) 132 ; CHECK-DAG: fmadd.d [[R1]], [[R2]], [[R3]] 133 store <2 x double> %4, <2 x double>* %d 134 ; CHECK-DAG: st.d [[R1]], 0($4) 135 136 ret void 137 ; CHECK: .size fma_v2f64 138} 139 140define void @fmsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, 141 <4 x float>* %c) nounwind { 142 ; CHECK: fmsub_v4f32: 143 144 %1 = load <4 x float>* %a 145 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 146 %2 = load <4 x float>* %b 147 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 148 %3 = load <4 x float>* %c 149 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) 150 %4 = fmul <4 x float> %2, %3 151 %5 = fsub <4 x float> %1, %4 152 ; CHECK-DAG: fmsub.w [[R1]], [[R2]], [[R3]] 153 store <4 x float> %5, <4 x float>* %d 154 ; CHECK-DAG: st.w [[R1]], 0($4) 155 156 ret void 157 ; CHECK: .size fmsub_v4f32 158} 159 160define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, 161 <2 x double>* %c) nounwind { 162 ; CHECK: fmsub_v2f64: 163 164 %1 = load <2 x double>* %a 165 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 166 %2 = load <2 x double>* %b 167 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 168 %3 = load <2 x double>* %c 169 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) 170 %4 = fmul <2 x double> %2, %3 171 %5 = fsub <2 x double> %1, %4 172 ; CHECK-DAG: fmsub.d [[R1]], [[R2]], [[R3]] 173 store <2 x double> %5, <2 x double>* %d 174 ; CHECK-DAG: st.d [[R1]], 0($4) 175 176 ret void 177 ; CHECK: .size fmsub_v2f64 178} 179 180define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { 181 ; CHECK: fdiv_v4f32: 182 183 %1 = load <4 x float>* %a 184 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 185 %2 = load <4 x float>* %b 186 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) 187 %3 = fdiv <4 x float> %1, %2 188 ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] 189 store <4 x float> %3, <4 x float>* %c 190 ; CHECK-DAG: st.w [[R3]], 0($4) 191 192 ret void 193 ; CHECK: .size fdiv_v4f32 194} 195 196define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { 197 ; CHECK: fdiv_v2f64: 198 199 %1 = load <2 x double>* %a 200 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 201 %2 = load <2 x double>* %b 202 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) 203 %3 = fdiv <2 x double> %1, %2 204 ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] 205 store <2 x double> %3, <2 x double>* %c 206 ; CHECK-DAG: st.d [[R3]], 0($4) 207 208 ret void 209 ; CHECK: .size fdiv_v2f64 210} 211 212define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 213 ; CHECK: fabs_v4f32: 214 215 %1 = load <4 x float>* %a 216 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 217 %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1) 218 ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] 219 store <4 x float> %2, <4 x float>* %c 220 ; CHECK-DAG: st.w [[R3]], 0($4) 221 222 ret void 223 ; CHECK: .size fabs_v4f32 224} 225 226define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 227 ; CHECK: fabs_v2f64: 228 229 %1 = load <2 x double>* %a 230 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 231 %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1) 232 ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]] 233 store <2 x double> %2, <2 x double>* %c 234 ; CHECK-DAG: st.d [[R3]], 0($4) 235 236 ret void 237 ; CHECK: .size fabs_v2f64 238} 239 240define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 241 ; CHECK: fexp2_v4f32: 242 243 %1 = load <4 x float>* %a 244 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 245 %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1) 246 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 247 ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]] 248 ; CHECK-DAG: fexp2.w [[R4:\$w[0-9]+]], [[R3]], [[R1]] 249 store <4 x float> %2, <4 x float>* %c 250 ; CHECK-DAG: st.w [[R4]], 0($4) 251 252 ret void 253 ; CHECK: .size fexp2_v4f32 254} 255 256define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 257 ; CHECK: fexp2_v2f64: 258 259 %1 = load <2 x double>* %a 260 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 261 %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1) 262 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 263 ; CHECK-DAG: ffint_u.d [[R4:\$w[0-9]+]], [[R3]] 264 ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]] 265 store <2 x double> %2, <2 x double>* %c 266 ; CHECK-DAG: st.d [[R4]], 0($4) 267 268 ret void 269 ; CHECK: .size fexp2_v2f64 270} 271 272define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind { 273 ; CHECK: fexp2_v4f32_2: 274 275 %1 = load <4 x float>* %a 276 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 277 %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1) 278 %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2 279 ; CHECK-DAG: lui [[R3:\$[0-9]+]], 16384 280 ; CHECK-DAG: fill.w [[R4:\$w[0-9]+]], [[R3]] 281 ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]] 282 store <4 x float> %3, <4 x float>* %c 283 ; CHECK-DAG: st.w [[R5]], 0($4) 284 285 ret void 286 ; CHECK: .size fexp2_v4f32_2 287} 288 289define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind { 290 ; CHECK: .8byte 4611686018427387904 291 ; CHECK-NEXT: .8byte 4611686018427387904 292 ; CHECK: fexp2_v2f64_2: 293 294 %1 = load <2 x double>* %a 295 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 296 %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1) 297 %3 = fmul <2 x double> <double 2.0, double 2.0>, %2 298 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo( 299 ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]] 300 store <2 x double> %3, <2 x double>* %c 301 ; CHECK-DAG: st.d [[R4]], 0($4) 302 303 ret void 304 ; CHECK: .size fexp2_v2f64_2 305} 306 307define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind { 308 ; CHECK: fsqrt_v4f32: 309 310 %1 = load <4 x float>* %a 311 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 312 %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1) 313 ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]] 314 store <4 x float> %2, <4 x float>* %c 315 ; CHECK-DAG: st.w [[R3]], 0($4) 316 317 ret void 318 ; CHECK: .size fsqrt_v4f32 319} 320 321define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind { 322 ; CHECK: fsqrt_v2f64: 323 324 %1 = load <2 x double>* %a 325 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 326 %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1) 327 ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]] 328 store <2 x double> %2, <2 x double>* %c 329 ; CHECK-DAG: st.d [[R3]], 0($4) 330 331 ret void 332 ; CHECK: .size fsqrt_v2f64 333} 334 335define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind { 336 ; CHECK: ffint_u_v4f32: 337 338 %1 = load <4 x i32>* %a 339 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 340 %2 = uitofp <4 x i32> %1 to <4 x float> 341 ; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]] 342 store <4 x float> %2, <4 x float>* %c 343 ; CHECK-DAG: st.w [[R3]], 0($4) 344 345 ret void 346 ; CHECK: .size ffint_u_v4f32 347} 348 349define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind { 350 ; CHECK: ffint_u_v2f64: 351 352 %1 = load <2 x i64>* %a 353 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 354 %2 = uitofp <2 x i64> %1 to <2 x double> 355 ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]] 356 store <2 x double> %2, <2 x double>* %c 357 ; CHECK-DAG: st.d [[R3]], 0($4) 358 359 ret void 360 ; CHECK: .size ffint_u_v2f64 361} 362 363define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind { 364 ; CHECK: ffint_s_v4f32: 365 366 %1 = load <4 x i32>* %a 367 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 368 %2 = sitofp <4 x i32> %1 to <4 x float> 369 ; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]] 370 store <4 x float> %2, <4 x float>* %c 371 ; CHECK-DAG: st.w [[R3]], 0($4) 372 373 ret void 374 ; CHECK: .size ffint_s_v4f32 375} 376 377define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind { 378 ; CHECK: ffint_s_v2f64: 379 380 %1 = load <2 x i64>* %a 381 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 382 %2 = sitofp <2 x i64> %1 to <2 x double> 383 ; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]] 384 store <2 x double> %2, <2 x double>* %c 385 ; CHECK-DAG: st.d [[R3]], 0($4) 386 387 ret void 388 ; CHECK: .size ffint_s_v2f64 389} 390 391define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind { 392 ; CHECK: ftrunc_u_v4f32: 393 394 %1 = load <4 x float>* %a 395 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 396 %2 = fptoui <4 x float> %1 to <4 x i32> 397 ; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]] 398 store <4 x i32> %2, <4 x i32>* %c 399 ; CHECK-DAG: st.w [[R3]], 0($4) 400 401 ret void 402 ; CHECK: .size ftrunc_u_v4f32 403} 404 405define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind { 406 ; CHECK: ftrunc_u_v2f64: 407 408 %1 = load <2 x double>* %a 409 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 410 %2 = fptoui <2 x double> %1 to <2 x i64> 411 ; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]] 412 store <2 x i64> %2, <2 x i64>* %c 413 ; CHECK-DAG: st.d [[R3]], 0($4) 414 415 ret void 416 ; CHECK: .size ftrunc_u_v2f64 417} 418 419define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind { 420 ; CHECK: ftrunc_s_v4f32: 421 422 %1 = load <4 x float>* %a 423 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) 424 %2 = fptosi <4 x float> %1 to <4 x i32> 425 ; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]] 426 store <4 x i32> %2, <4 x i32>* %c 427 ; CHECK-DAG: st.w [[R3]], 0($4) 428 429 ret void 430 ; CHECK: .size ftrunc_s_v4f32 431} 432 433define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind { 434 ; CHECK: ftrunc_s_v2f64: 435 436 %1 = load <2 x double>* %a 437 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) 438 %2 = fptosi <2 x double> %1 to <2 x i64> 439 ; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]] 440 store <2 x i64> %2, <2 x i64>* %c 441 ; CHECK-DAG: st.d [[R3]], 0($4) 442 443 ret void 444 ; CHECK: .size ftrunc_s_v2f64 445} 446 447declare <4 x float> @llvm.fabs.v4f32(<4 x float> %Val) 448declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val) 449declare <4 x float> @llvm.exp2.v4f32(<4 x float> %val) 450declare <2 x double> @llvm.exp2.v2f64(<2 x double> %val) 451declare <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, 452 <4 x float> %c) 453declare <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, 454 <2 x double> %c) 455declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %Val) 456declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val) 457