1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s 3 4; Each pair of tests should be logically equivalent, 5; so codegen should be the same and optimal for each pair. 6 7define <4 x float> @splat0_before_fmul_constant(<4 x float> %a) { 8; CHECK-LABEL: splat0_before_fmul_constant: 9; CHECK: // %bb.0: 10; CHECK-NEXT: fmov v1.4s, #3.00000000 11; CHECK-NEXT: fmul v0.4s, v1.4s, v0.s[0] 12; CHECK-NEXT: ret 13 %splat = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer 14 %mul = fmul <4 x float> %splat, <float 3.0, float 3.0, float 3.0, float 3.0> 15 ret <4 x float> %mul 16} 17 18define <4 x float> @splat0_after_fmul_constant(<4 x float> %a) { 19; CHECK-LABEL: splat0_after_fmul_constant: 20; CHECK: // %bb.0: 21; CHECK-NEXT: fmov v1.4s, #3.00000000 22; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s 23; CHECK-NEXT: dup v0.4s, v0.s[0] 24; CHECK-NEXT: ret 25 %mul = fmul <4 x float> %a, <float 3.0, float 42.0, float 3.0, float 3.0> 26 %splat = shufflevector <4 x float> %mul, <4 x float> undef, <4 x i32> zeroinitializer 27 ret <4 x float> %splat 28} 29 30; Try different type and splat lane. 31 32define <2 x double> @splat1_before_fmul_constant(<2 x double> %a) { 33; CHECK-LABEL: splat1_before_fmul_constant: 34; CHECK: // %bb.0: 35; CHECK-NEXT: fmov v1.2d, #5.00000000 36; CHECK-NEXT: fmul v0.2d, v1.2d, v0.d[1] 37; CHECK-NEXT: ret 38 %splat = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 1> 39 %mul = fmul <2 x double> %splat, <double 5.0, double 5.0> 40 ret <2 x double> %mul 41} 42 43define <2 x double> @splat1_after_fmul_constant(<2 x double> %a) { 44; CHECK-LABEL: splat1_after_fmul_constant: 45; CHECK: // %bb.0: 46; CHECK-NEXT: fmov v1.2d, #5.00000000 47; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d 48; CHECK-NEXT: dup v0.2d, v0.d[1] 49; CHECK-NEXT: ret 50 %mul = fmul <2 x double> %a, <double -1.0, double 5.0> 51 %splat = shufflevector <2 x double> %mul, <2 x double> undef, <2 x i32> <i32 1, i32 1> 52 ret <2 x double> %splat 53} 54 55; 2 variable operands 56 57define <2 x double> @splat1_before_fmul(<2 x double> %a, <2 x double> %b) { 58; CHECK-LABEL: splat1_before_fmul: 59; CHECK: // %bb.0: 60; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d 61; CHECK-NEXT: dup v0.2d, v0.d[1] 62; CHECK-NEXT: ret 63 %splata = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 1> 64 %splatb = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> <i32 1, i32 1> 65 %mul = fmul <2 x double> %splata, %splatb 66 ret <2 x double> %mul 67} 68 69define <2 x double> @splat1_after_fmul(<2 x double> %a, <2 x double> %b) { 70; CHECK-LABEL: splat1_after_fmul: 71; CHECK: // %bb.0: 72; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d 73; CHECK-NEXT: dup v0.2d, v0.d[1] 74; CHECK-NEXT: ret 75 %mul = fmul <2 x double> %a, %b 76 %splat = shufflevector <2 x double> %mul, <2 x double> undef, <2 x i32> <i32 1, i32 1> 77 ret <2 x double> %splat 78} 79 80; Integer multiply 81 82define <4 x i32> @splat2_before_mul_constant(<4 x i32> %a) { 83; CHECK-LABEL: splat2_before_mul_constant: 84; CHECK: // %bb.0: 85; CHECK-NEXT: movi v1.4s, #3 86; CHECK-NEXT: mul v0.4s, v1.4s, v0.s[2] 87; CHECK-NEXT: ret 88 %splat = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef> 89 %mul = mul <4 x i32> %splat, <i32 3, i32 3, i32 3, i32 3> 90 ret <4 x i32> %mul 91} 92 93define <4 x i32> @splat2_after_mul_constant(<4 x i32> %a) { 94; CHECK-LABEL: splat2_after_mul_constant: 95; CHECK: // %bb.0: 96; CHECK-NEXT: movi v1.4s, #3 97; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s 98; CHECK-NEXT: dup v0.4s, v0.s[2] 99; CHECK-NEXT: ret 100 %mul = mul <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> 101 %splat = shufflevector <4 x i32> %mul, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef> 102 ret <4 x i32> %splat 103} 104 105; Different type, lane, and 2 variable operands. 106 107define <8 x i16> @splat1_before_mul(<8 x i16> %a, <8 x i16> %b) { 108; CHECK-LABEL: splat1_before_mul: 109; CHECK: // %bb.0: 110; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h 111; CHECK-NEXT: dup v0.8h, v0.h[1] 112; CHECK-NEXT: ret 113 %splata = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 114 %splatb = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 115 %mul = mul <8 x i16> %splata, %splatb 116 ret <8 x i16> %mul 117} 118 119define <8 x i16> @splat1_after_mul(<8 x i16> %a, <8 x i16> %b) { 120; CHECK-LABEL: splat1_after_mul: 121; CHECK: // %bb.0: 122; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h 123; CHECK-NEXT: dup v0.8h, v0.h[1] 124; CHECK-NEXT: ret 125 %mul = mul <8 x i16> %a, %b 126 %splat = shufflevector <8 x i16> %mul, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 127 ret <8 x i16> %splat 128} 129 130; Multiple multiplies. 131 132define <4 x float> @splat0_before_fmul_fmul_constant(<4 x float> %a) { 133; CHECK-LABEL: splat0_before_fmul_fmul_constant: 134; CHECK: // %bb.0: 135; CHECK-NEXT: fmov v1.4s, #3.00000000 136; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s 137; CHECK-NEXT: fmov v1.4s, #6.00000000 138; CHECK-NEXT: fmul v0.4s, v1.4s, v0.s[0] 139; CHECK-NEXT: ret 140 %splat1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer 141 %mul1 = fmul <4 x float> %splat1, <float 3.0, float 3.0, float 3.0, float 3.0> 142 %splat2 = shufflevector <4 x float> %mul1, <4 x float> undef, <4 x i32> zeroinitializer 143 %mul2 = fmul <4 x float> %splat2, <float 6.0, float 6.0, float 6.0, float 6.0> 144 ret <4 x float> %mul2 145} 146 147define <4 x float> @splat0_after_fmul_fmul_constant(<4 x float> %a) { 148; CHECK-LABEL: splat0_after_fmul_fmul_constant: 149; CHECK: // %bb.0: 150; CHECK-NEXT: fmov v1.4s, #3.00000000 151; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s 152; CHECK-NEXT: fmov v1.4s, #6.00000000 153; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s 154; CHECK-NEXT: dup v0.4s, v0.s[0] 155; CHECK-NEXT: ret 156 %mul1 = fmul <4 x float> %a, <float 3.0, float 42.0, float 3.0, float 3.0> 157 %mul2 = fmul <4 x float> %mul1, <float 6.0, float 42.0, float 3.0, float 3.0> 158 %splat = shufflevector <4 x float> %mul2, <4 x float> undef, <4 x i32> zeroinitializer 159 ret <4 x float> %splat 160} 161