1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -slp-vectorizer -slp-threshold=-200 -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -S | FileCheck %s 3 4define void @test_add_sdiv(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { 5; CHECK-LABEL: @test_add_sdiv( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: [[GEP1_0:%.*]] = getelementptr i32, i32* [[ARR1:%.*]], i32 0 8; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr i32, i32* [[ARR1]], i32 1 9; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, i32* [[ARR1]], i32 2 10; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, i32* [[ARR1]], i32 3 11; CHECK-NEXT: [[GEP2_0:%.*]] = getelementptr i32, i32* [[ARR2:%.*]], i32 0 12; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr i32, i32* [[ARR2]], i32 1 13; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, i32* [[ARR2]], i32 2 14; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, i32* [[ARR2]], i32 3 15; CHECK-NEXT: [[V0:%.*]] = load i32, i32* [[GEP1_0]] 16; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[GEP1_1]] 17; CHECK-NEXT: [[V2:%.*]] = load i32, i32* [[GEP1_2]] 18; CHECK-NEXT: [[V3:%.*]] = load i32, i32* [[GEP1_3]] 19; CHECK-NEXT: [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146 20; CHECK-NEXT: [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146 21; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 22; CHECK-NEXT: [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0 23; CHECK-NEXT: [[RES0:%.*]] = add nsw i32 [[V0]], [[Y0]] 24; CHECK-NEXT: [[RES1:%.*]] = add nsw i32 [[V1]], [[Y1]] 25; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]] 26; CHECK-NEXT: [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]] 27; CHECK-NEXT: store i32 [[RES0]], i32* [[GEP2_0]] 28; CHECK-NEXT: store i32 [[RES1]], i32* [[GEP2_1]] 29; CHECK-NEXT: store i32 [[RES2]], i32* [[GEP2_2]] 30; CHECK-NEXT: store i32 [[RES3]], i32* [[GEP2_3]] 31; CHECK-NEXT: ret void 32; 33entry: 34 %gep1.0 = getelementptr i32, i32* %arr1, i32 0 35 %gep1.1 = getelementptr i32, i32* %arr1, i32 1 36 %gep1.2 = getelementptr i32, i32* %arr1, i32 2 37 %gep1.3 = getelementptr i32, i32* %arr1, i32 3 38 %gep2.0 = getelementptr i32, i32* %arr2, i32 0 39 %gep2.1 = getelementptr i32, i32* %arr2, i32 1 40 %gep2.2 = getelementptr i32, i32* %arr2, i32 2 41 %gep2.3 = getelementptr i32, i32* %arr2, i32 3 42 %v0 = load i32, i32* %gep1.0 43 %v1 = load i32, i32* %gep1.1 44 %v2 = load i32, i32* %gep1.2 45 %v3 = load i32, i32* %gep1.3 46 %y0 = add nsw i32 %a0, 1146 47 %y1 = add nsw i32 %a1, 146 48 %y2 = add nsw i32 %a2, 42 49 ;; %y3 is zero if %a3 is zero 50 %y3 = add nsw i32 %a3, 0 51 %res0 = add nsw i32 %v0, %y0 52 %res1 = add nsw i32 %v1, %y1 53 ;; As such, doing alternate shuffling would be incorrect: 54 ;; %vadd = add nsw %v[0-3], %y[0-3] 55 ;; %vsdiv = sdiv %v[0-3], %y[0-3] 56 ;; %result = shuffle %vadd, %vsdiv, <mask> 57 ;; would be illegal. 58 %res2 = sdiv i32 %v2, %y2 59 %res3 = add nsw i32 %v3, %y3 60 store i32 %res0, i32* %gep2.0 61 store i32 %res1, i32* %gep2.1 62 store i32 %res2, i32* %gep2.2 63 store i32 %res3, i32* %gep2.3 64 ret void 65} 66 67;; Similar test, but now div/rem is main opcode and not the alternate one. Same issue. 68define void @test_urem_add(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { 69; CHECK-LABEL: @test_urem_add( 70; CHECK-NEXT: entry: 71; CHECK-NEXT: [[GEP1_0:%.*]] = getelementptr i32, i32* [[ARR1:%.*]], i32 0 72; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr i32, i32* [[ARR1]], i32 1 73; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, i32* [[ARR1]], i32 2 74; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, i32* [[ARR1]], i32 3 75; CHECK-NEXT: [[GEP2_0:%.*]] = getelementptr i32, i32* [[ARR2:%.*]], i32 0 76; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr i32, i32* [[ARR2]], i32 1 77; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, i32* [[ARR2]], i32 2 78; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, i32* [[ARR2]], i32 3 79; CHECK-NEXT: [[V0:%.*]] = load i32, i32* [[GEP1_0]] 80; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[GEP1_1]] 81; CHECK-NEXT: [[V2:%.*]] = load i32, i32* [[GEP1_2]] 82; CHECK-NEXT: [[V3:%.*]] = load i32, i32* [[GEP1_3]] 83; CHECK-NEXT: [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146 84; CHECK-NEXT: [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146 85; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 86; CHECK-NEXT: [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0 87; CHECK-NEXT: [[RES0:%.*]] = urem i32 [[V0]], [[Y0]] 88; CHECK-NEXT: [[RES1:%.*]] = urem i32 [[V1]], [[Y1]] 89; CHECK-NEXT: [[RES2:%.*]] = urem i32 [[V2]], [[Y2]] 90; CHECK-NEXT: [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]] 91; CHECK-NEXT: store i32 [[RES0]], i32* [[GEP2_0]] 92; CHECK-NEXT: store i32 [[RES1]], i32* [[GEP2_1]] 93; CHECK-NEXT: store i32 [[RES2]], i32* [[GEP2_2]] 94; CHECK-NEXT: store i32 [[RES3]], i32* [[GEP2_3]] 95; CHECK-NEXT: ret void 96; 97entry: 98 %gep1.0 = getelementptr i32, i32* %arr1, i32 0 99 %gep1.1 = getelementptr i32, i32* %arr1, i32 1 100 %gep1.2 = getelementptr i32, i32* %arr1, i32 2 101 %gep1.3 = getelementptr i32, i32* %arr1, i32 3 102 %gep2.0 = getelementptr i32, i32* %arr2, i32 0 103 %gep2.1 = getelementptr i32, i32* %arr2, i32 1 104 %gep2.2 = getelementptr i32, i32* %arr2, i32 2 105 %gep2.3 = getelementptr i32, i32* %arr2, i32 3 106 %v0 = load i32, i32* %gep1.0 107 %v1 = load i32, i32* %gep1.1 108 %v2 = load i32, i32* %gep1.2 109 %v3 = load i32, i32* %gep1.3 110 %y0 = add nsw i32 %a0, 1146 111 %y1 = add nsw i32 %a1, 146 112 %y2 = add nsw i32 %a2, 42 113 ;; %y3 is zero if %a3 is zero 114 %y3 = add nsw i32 %a3, 0 115 %res0 = urem i32 %v0, %y0 116 %res1 = urem i32 %v1, %y1 117 %res2 = urem i32 %v2, %y2 118 ;; As such, doing alternate shuffling would be incorrect: 119 ;; %vurem = urem %v[0-3], %y[0-3] 120 ;; %vadd = add nsw %v[0-3], %y[0-3] 121 ;; %result = shuffle %vurem, %vadd, <mask> 122 ;; would be illegal. 123 %res3 = add nsw i32 %v3, %y3 124 store i32 %res0, i32* %gep2.0 125 store i32 %res1, i32* %gep2.1 126 store i32 %res2, i32* %gep2.2 127 store i32 %res3, i32* %gep2.3 128 ret void 129} 130