1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s 3 4; Odd divisor 5define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind { 6; CHECK-LABEL: test_urem_odd_25: 7; CHECK: // %bb.0: 8; CHECK-NEXT: mov w8, #23593 9; CHECK-NEXT: movk w8, #49807, lsl #16 10; CHECK-NEXT: mov w9, #28835 11; CHECK-NEXT: movk w9, #2621, lsl #16 12; CHECK-NEXT: dup v1.4s, w8 13; CHECK-NEXT: dup v2.4s, w9 14; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s 15; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s 16; CHECK-NEXT: movi v1.4s, #1 17; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 18; CHECK-NEXT: ret 19 %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25> 20 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 21 %ret = zext <4 x i1> %cmp to <4 x i32> 22 ret <4 x i32> %ret 23} 24 25; Even divisors 26define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind { 27; CHECK-LABEL: test_urem_even_100: 28; CHECK: // %bb.0: 29; CHECK-NEXT: mov w8, #34079 30; CHECK-NEXT: movk w8, #20971, lsl #16 31; CHECK-NEXT: dup v2.4s, w8 32; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s 33; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s 34; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s 35; CHECK-NEXT: movi v1.4s, #100 36; CHECK-NEXT: ushr v2.4s, v2.4s, #5 37; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s 38; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 39; CHECK-NEXT: movi v1.4s, #1 40; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 41; CHECK-NEXT: ret 42 %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100> 43 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 44 %ret = zext <4 x i1> %cmp to <4 x i32> 45 ret <4 x i32> %ret 46} 47 48; Negative divisors should be negated, and thus this is still splat vectors. 49 50; Odd divisor 51define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind { 52; CHECK-LABEL: test_urem_odd_neg25: 53; CHECK: // %bb.0: 54; CHECK-NEXT: adrp x8, .LCPI2_0 55; CHECK-NEXT: adrp x9, .LCPI2_1 56; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] 57; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] 58; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s 59; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s 60; CHECK-NEXT: movi v1.4s, #1 61; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 62; CHECK-NEXT: ret 63 %urem = urem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25> 64 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 65 %ret = zext <4 x i1> %cmp to <4 x i32> 66 ret <4 x i32> %ret 67} 68 69; Even divisors 70define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind { 71; CHECK-LABEL: test_urem_even_neg100: 72; CHECK: // %bb.0: 73; CHECK-NEXT: adrp x8, .LCPI3_0 74; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] 75; CHECK-NEXT: adrp x8, .LCPI3_1 76; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] 77; CHECK-NEXT: adrp x8, .LCPI3_2 78; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_2] 79; CHECK-NEXT: neg v1.4s, v1.4s 80; CHECK-NEXT: adrp x8, .LCPI3_3 81; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s 82; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s 83; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s 84; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] 85; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s 86; CHECK-NEXT: neg v3.4s, v3.4s 87; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s 88; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s 89; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 90; CHECK-NEXT: movi v1.4s, #1 91; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 92; CHECK-NEXT: ret 93 %urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100> 94 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 95 %ret = zext <4 x i1> %cmp to <4 x i32> 96 ret <4 x i32> %ret 97} 98 99;------------------------------------------------------------------------------; 100; Comparison constant has undef elements. 101;------------------------------------------------------------------------------; 102 103define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind { 104; CHECK-LABEL: test_urem_odd_undef1: 105; CHECK: // %bb.0: 106; CHECK-NEXT: mov w8, #34079 107; CHECK-NEXT: movk w8, #20971, lsl #16 108; CHECK-NEXT: dup v2.4s, w8 109; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s 110; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s 111; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s 112; CHECK-NEXT: movi v1.4s, #25 113; CHECK-NEXT: ushr v2.4s, v2.4s, #3 114; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s 115; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 116; CHECK-NEXT: movi v1.4s, #1 117; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 118; CHECK-NEXT: ret 119 %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25> 120 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0> 121 %ret = zext <4 x i1> %cmp to <4 x i32> 122 ret <4 x i32> %ret 123} 124 125define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind { 126; CHECK-LABEL: test_urem_even_undef1: 127; CHECK: // %bb.0: 128; CHECK-NEXT: mov w8, #34079 129; CHECK-NEXT: movk w8, #20971, lsl #16 130; CHECK-NEXT: dup v2.4s, w8 131; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s 132; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s 133; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s 134; CHECK-NEXT: movi v1.4s, #100 135; CHECK-NEXT: ushr v2.4s, v2.4s, #5 136; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s 137; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 138; CHECK-NEXT: movi v1.4s, #1 139; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 140; CHECK-NEXT: ret 141 %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100> 142 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0> 143 %ret = zext <4 x i1> %cmp to <4 x i32> 144 ret <4 x i32> %ret 145} 146 147;------------------------------------------------------------------------------; 148; Negative tests 149;------------------------------------------------------------------------------; 150 151define <4 x i32> @test_urem_one_eq(<4 x i32> %X) nounwind { 152; CHECK-LABEL: test_urem_one_eq: 153; CHECK: // %bb.0: 154; CHECK-NEXT: movi v0.4s, #1 155; CHECK-NEXT: ret 156 %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1> 157 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 158 %ret = zext <4 x i1> %cmp to <4 x i32> 159 ret <4 x i32> %ret 160} 161define <4 x i32> @test_urem_one_ne(<4 x i32> %X) nounwind { 162; CHECK-LABEL: test_urem_one_ne: 163; CHECK: // %bb.0: 164; CHECK-NEXT: movi v0.2d, #0000000000000000 165; CHECK-NEXT: ret 166 %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1> 167 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 168 %ret = zext <4 x i1> %cmp to <4 x i32> 169 ret <4 x i32> %ret 170} 171 172; We can lower remainder of division by powers of two much better elsewhere. 173define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind { 174; CHECK-LABEL: test_urem_pow2: 175; CHECK: // %bb.0: 176; CHECK-NEXT: movi v1.4s, #15 177; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 178; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 179; CHECK-NEXT: movi v1.4s, #1 180; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 181; CHECK-NEXT: ret 182 %urem = urem <4 x i32> %X, <i32 16, i32 16, i32 16, i32 16> 183 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 184 %ret = zext <4 x i1> %cmp to <4 x i32> 185 ret <4 x i32> %ret 186} 187 188; We could lower remainder of division by INT_MIN much better elsewhere. 189define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind { 190; CHECK-LABEL: test_urem_int_min: 191; CHECK: // %bb.0: 192; CHECK-NEXT: bic v0.4s, #128, lsl #24 193; CHECK-NEXT: movi v1.4s, #1 194; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 195; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 196; CHECK-NEXT: ret 197 %urem = urem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> 198 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 199 %ret = zext <4 x i1> %cmp to <4 x i32> 200 ret <4 x i32> %ret 201} 202 203; We could lower remainder of division by all-ones much better elsewhere. 204define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind { 205; CHECK-LABEL: test_urem_allones: 206; CHECK: // %bb.0: 207; CHECK-NEXT: neg v0.4s, v0.4s 208; CHECK-NEXT: movi v1.4s, #1 209; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s 210; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 211; CHECK-NEXT: ret 212 %urem = urem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295> 213 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 214 %ret = zext <4 x i1> %cmp to <4 x i32> 215 ret <4 x i32> %ret 216} 217