1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s 3; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -slp-threshold=-10 | FileCheck %s --check-prefix=THRESHOLD 4 5@n = external local_unnamed_addr global i32, align 4 6@arr = common local_unnamed_addr global [20 x float] zeroinitializer, align 16 7@arr1 = common local_unnamed_addr global [20 x float] zeroinitializer, align 16 8@res = external local_unnamed_addr global float, align 4 9 10define float @baz() { 11; CHECK-LABEL: @baz( 12; CHECK-NEXT: entry: 13; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 14; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 15; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 16; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16 17; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16 18; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]] 19; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 20; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]] 21; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 22; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]] 23; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8 24; CHECK-NEXT: [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8 25; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]] 26; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 27; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]] 28; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 29; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]] 30; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]] 31; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[TMP4]], [[ADD7]] 32; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float [[TMP5]], [[ADD19]] 33; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP9]], [[ADD19_1]] 34; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP10]], [[ADD19_2]] 35; CHECK-NEXT: store float [[ADD19_3]], float* @res, align 4 36; CHECK-NEXT: ret float [[ADD19_3]] 37; 38; THRESHOLD-LABEL: @baz( 39; THRESHOLD-NEXT: entry: 40; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 41; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 42; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 43; THRESHOLD-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16 44; THRESHOLD-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16 45; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]] 46; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 47; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[TMP4]], [[CONV]] 48; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 49; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP5]], [[ADD]] 50; THRESHOLD-NEXT: [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8 51; THRESHOLD-NEXT: [[TMP7:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8 52; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]] 53; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 54; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP9]], [[ADD_1]] 55; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 56; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP10]], [[ADD_2]] 57; THRESHOLD-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]] 58; THRESHOLD-NEXT: [[ADD19:%.*]] = fadd fast float [[TMP4]], [[ADD7]] 59; THRESHOLD-NEXT: [[ADD19_1:%.*]] = fadd fast float [[TMP5]], [[ADD19]] 60; THRESHOLD-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP9]], [[ADD19_1]] 61; THRESHOLD-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP10]], [[ADD19_2]] 62; THRESHOLD-NEXT: store float [[ADD19_3]], float* @res, align 4 63; THRESHOLD-NEXT: ret float [[ADD19_3]] 64; 65entry: 66 %0 = load i32, i32* @n, align 4 67 %mul = mul nsw i32 %0, 3 68 %conv = sitofp i32 %mul to float 69 %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 70 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 71 %mul4 = fmul fast float %2, %1 72 %add = fadd fast float %mul4, %conv 73 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 74 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 75 %mul4.1 = fmul fast float %4, %3 76 %add.1 = fadd fast float %mul4.1, %add 77 %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 78 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 79 %mul4.2 = fmul fast float %6, %5 80 %add.2 = fadd fast float %mul4.2, %add.1 81 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 82 %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 83 %mul4.3 = fmul fast float %8, %7 84 %add.3 = fadd fast float %mul4.3, %add.2 85 %add7 = fadd fast float %add.3, %conv 86 %add19 = fadd fast float %mul4, %add7 87 %add19.1 = fadd fast float %mul4.1, %add19 88 %add19.2 = fadd fast float %mul4.2, %add19.1 89 %add19.3 = fadd fast float %mul4.3, %add19.2 90 store float %add19.3, float* @res, align 4 91 ret float %add19.3 92} 93 94define float @bazz() { 95; CHECK-LABEL: @bazz( 96; CHECK-NEXT: entry: 97; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 98; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 99; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 100; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 101; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 102; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] 103; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] 104; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] 105; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] 106; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] 107; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 108; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float 109; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]] 110; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]] 111; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]] 112; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]] 113; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 114; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]] 115; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 116; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] 117; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 118; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 119; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 120; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] 121; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] 122; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]] 123; CHECK-NEXT: store float [[OP_EXTRA5]], float* @res, align 4 124; CHECK-NEXT: ret float [[OP_EXTRA5]] 125; 126; THRESHOLD-LABEL: @bazz( 127; THRESHOLD-NEXT: entry: 128; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 129; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3 130; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 131; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 132; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 133; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] 134; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] 135; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] 136; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] 137; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] 138; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 139; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float 140; THRESHOLD-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]] 141; THRESHOLD-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]] 142; THRESHOLD-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]] 143; THRESHOLD-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]] 144; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 145; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]] 146; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 147; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] 148; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 149; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 150; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 151; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] 152; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] 153; THRESHOLD-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]] 154; THRESHOLD-NEXT: store float [[OP_EXTRA5]], float* @res, align 4 155; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] 156; 157entry: 158 %0 = load i32, i32* @n, align 4 159 %mul = mul nsw i32 %0, 3 160 %conv = sitofp i32 %mul to float 161 %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 162 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 163 %mul4 = fmul fast float %2, %1 164 %add = fadd fast float %mul4, %conv 165 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 166 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 167 %mul4.1 = fmul fast float %4, %3 168 %add.1 = fadd fast float %mul4.1, %add 169 %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 170 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 171 %mul4.2 = fmul fast float %6, %5 172 %add.2 = fadd fast float %mul4.2, %add.1 173 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 174 %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 175 %mul4.3 = fmul fast float %8, %7 176 %add.3 = fadd fast float %mul4.3, %add.2 177 %mul5 = shl nsw i32 %0, 2 178 %conv6 = sitofp i32 %mul5 to float 179 %add7 = fadd fast float %add.3, %conv6 180 %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4), align 16 181 %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4), align 16 182 %mul18 = fmul fast float %10, %9 183 %add19 = fadd fast float %mul18, %add7 184 %11 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 5), align 4 185 %12 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 5), align 4 186 %mul18.1 = fmul fast float %12, %11 187 %add19.1 = fadd fast float %mul18.1, %add19 188 %13 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6), align 8 189 %14 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6), align 8 190 %mul18.2 = fmul fast float %14, %13 191 %add19.2 = fadd fast float %mul18.2, %add19.1 192 %15 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 7), align 4 193 %16 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 7), align 4 194 %mul18.3 = fmul fast float %16, %15 195 %add19.3 = fadd fast float %mul18.3, %add19.2 196 store float %add19.3, float* @res, align 4 197 ret float %add19.3 198} 199 200define float @bazzz() { 201; CHECK-LABEL: @bazzz( 202; CHECK-NEXT: entry: 203; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 204; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float 205; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 206; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 207; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] 208; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef 209; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] 210; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 211; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] 212; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 213; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 214; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 215; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] 216; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] 217; CHECK-NEXT: store float [[TMP8]], float* @res, align 4 218; CHECK-NEXT: ret float [[TMP8]] 219; 220; THRESHOLD-LABEL: @bazzz( 221; THRESHOLD-NEXT: entry: 222; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 223; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float 224; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 225; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 226; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] 227; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef 228; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] 229; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 230; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] 231; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 232; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 233; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 234; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] 235; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] 236; THRESHOLD-NEXT: store float [[TMP8]], float* @res, align 4 237; THRESHOLD-NEXT: ret float [[TMP8]] 238; 239entry: 240 %0 = load i32, i32* @n, align 4 241 %conv = sitofp i32 %0 to float 242 %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 243 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 244 %mul = fmul fast float %2, %1 245 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 246 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 247 %mul.1 = fmul fast float %4, %3 248 %5 = fadd fast float %mul.1, %mul 249 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 250 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 251 %mul.2 = fmul fast float %7, %6 252 %8 = fadd fast float %mul.2, %5 253 %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 254 %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 255 %mul.3 = fmul fast float %10, %9 256 %11 = fadd fast float %mul.3, %8 257 %12 = fmul fast float %conv, %11 258 store float %12, float* @res, align 4 259 ret float %12 260} 261 262define i32 @foo() { 263; CHECK-LABEL: @foo( 264; CHECK-NEXT: entry: 265; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 266; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float 267; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 268; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 269; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] 270; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef 271; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] 272; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 273; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] 274; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 275; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 276; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 277; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] 278; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] 279; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32 280; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4 281; CHECK-NEXT: ret i32 [[CONV4]] 282; 283; THRESHOLD-LABEL: @foo( 284; THRESHOLD-NEXT: entry: 285; THRESHOLD-NEXT: [[TMP0:%.*]] = load i32, i32* @n, align 4 286; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float 287; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 288; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 289; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] 290; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef 291; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] 292; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 293; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] 294; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 295; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] 296; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 297; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] 298; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] 299; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32 300; THRESHOLD-NEXT: store i32 [[CONV4]], i32* @n, align 4 301; THRESHOLD-NEXT: ret i32 [[CONV4]] 302; 303entry: 304 %0 = load i32, i32* @n, align 4 305 %conv = sitofp i32 %0 to float 306 %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 307 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 308 %mul = fmul fast float %2, %1 309 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 310 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 311 %mul.1 = fmul fast float %4, %3 312 %5 = fadd fast float %mul.1, %mul 313 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 314 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 315 %mul.2 = fmul fast float %7, %6 316 %8 = fadd fast float %mul.2, %5 317 %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 318 %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 319 %mul.3 = fmul fast float %10, %9 320 %11 = fadd fast float %mul.3, %8 321 %12 = fmul fast float %conv, %11 322 %conv4 = fptosi float %12 to i32 323 store i32 %conv4, i32* @n, align 4 324 ret i32 %conv4 325} 326 327define float @bar() { 328; CHECK-LABEL: @bar( 329; CHECK-NEXT: entry: 330; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 331; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 332; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] 333; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float undef, undef 334; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef 335; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef 336; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef 337; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef 338; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 339; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]] 340; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]] 341; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 342; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] 343; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] 344; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 345; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef 346; CHECK-NEXT: store float [[TMP3]], float* @res, align 4 347; CHECK-NEXT: ret float [[TMP3]] 348; 349; THRESHOLD-LABEL: @bar( 350; THRESHOLD-NEXT: entry: 351; THRESHOLD-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 352; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 353; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] 354; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float undef, undef 355; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef 356; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef 357; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef 358; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef 359; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 360; THRESHOLD-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]] 361; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]] 362; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 363; THRESHOLD-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] 364; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] 365; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 366; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef 367; THRESHOLD-NEXT: store float [[TMP3]], float* @res, align 4 368; THRESHOLD-NEXT: ret float [[TMP3]] 369; 370entry: 371 %0 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 372 %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16 373 %mul = fmul fast float %1, %0 374 %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4 375 %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4 376 %mul3 = fmul fast float %3, %2 377 %cmp4 = fcmp fast ogt float %mul, %mul3 378 %max.0.mul3 = select i1 %cmp4, float %mul, float %mul3 379 %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8 380 %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8 381 %mul3.1 = fmul fast float %5, %4 382 %cmp4.1 = fcmp fast ogt float %max.0.mul3, %mul3.1 383 %max.0.mul3.1 = select i1 %cmp4.1, float %max.0.mul3, float %mul3.1 384 %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4 385 %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4 386 %mul3.2 = fmul fast float %7, %6 387 %cmp4.2 = fcmp fast ogt float %max.0.mul3.1, %mul3.2 388 %max.0.mul3.2 = select i1 %cmp4.2, float %max.0.mul3.1, float %mul3.2 389 store float %max.0.mul3.2, float* @res, align 4 390 ret float %max.0.mul3.2 391} 392 393define float @f(float* nocapture readonly %x) { 394; CHECK-LABEL: @f( 395; CHECK-NEXT: entry: 396; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 397; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 398; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 399; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 400; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 401; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 402; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 403; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 404; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 405; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 406; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 407; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 408; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 409; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 410; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 411; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* 412; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 413; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef 414; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] 415; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] 416; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] 417; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] 418; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] 419; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] 420; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] 421; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] 422; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] 423; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] 424; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] 425; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] 426; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] 427; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] 428; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 429; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 430; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 431; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 432; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 433; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 434; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 435; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 436; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 437; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 438; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 439; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 440; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 441; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 442; CHECK-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 443; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 444; CHECK-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32 445; CHECK-NEXT: [[ARRAYIDX_33:%.*]] = getelementptr inbounds float, float* [[X]], i64 33 446; CHECK-NEXT: [[ARRAYIDX_34:%.*]] = getelementptr inbounds float, float* [[X]], i64 34 447; CHECK-NEXT: [[ARRAYIDX_35:%.*]] = getelementptr inbounds float, float* [[X]], i64 35 448; CHECK-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds float, float* [[X]], i64 36 449; CHECK-NEXT: [[ARRAYIDX_37:%.*]] = getelementptr inbounds float, float* [[X]], i64 37 450; CHECK-NEXT: [[ARRAYIDX_38:%.*]] = getelementptr inbounds float, float* [[X]], i64 38 451; CHECK-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds float, float* [[X]], i64 39 452; CHECK-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds float, float* [[X]], i64 40 453; CHECK-NEXT: [[ARRAYIDX_41:%.*]] = getelementptr inbounds float, float* [[X]], i64 41 454; CHECK-NEXT: [[ARRAYIDX_42:%.*]] = getelementptr inbounds float, float* [[X]], i64 42 455; CHECK-NEXT: [[ARRAYIDX_43:%.*]] = getelementptr inbounds float, float* [[X]], i64 43 456; CHECK-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds float, float* [[X]], i64 44 457; CHECK-NEXT: [[ARRAYIDX_45:%.*]] = getelementptr inbounds float, float* [[X]], i64 45 458; CHECK-NEXT: [[ARRAYIDX_46:%.*]] = getelementptr inbounds float, float* [[X]], i64 46 459; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 460; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* 461; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 462; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] 463; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] 464; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] 465; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] 466; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] 467; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] 468; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] 469; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] 470; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] 471; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] 472; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] 473; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] 474; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] 475; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] 476; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] 477; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] 478; CHECK-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]] 479; CHECK-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]] 480; CHECK-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]] 481; CHECK-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]] 482; CHECK-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]] 483; CHECK-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]] 484; CHECK-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]] 485; CHECK-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]] 486; CHECK-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]] 487; CHECK-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]] 488; CHECK-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]] 489; CHECK-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]] 490; CHECK-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]] 491; CHECK-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]] 492; CHECK-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]] 493; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 494; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]] 495; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 496; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]] 497; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 498; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 499; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 500; CHECK-NEXT: [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]] 501; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 502; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] 503; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 504; CHECK-NEXT: [[RDX_SHUF9:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 505; CHECK-NEXT: [[BIN_RDX10:%.*]] = fadd fast <16 x float> [[TMP1]], [[RDX_SHUF9]] 506; CHECK-NEXT: [[RDX_SHUF11:%.*]] = shufflevector <16 x float> [[BIN_RDX10]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 507; CHECK-NEXT: [[BIN_RDX12:%.*]] = fadd fast <16 x float> [[BIN_RDX10]], [[RDX_SHUF11]] 508; CHECK-NEXT: [[RDX_SHUF13:%.*]] = shufflevector <16 x float> [[BIN_RDX12]], <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 509; CHECK-NEXT: [[BIN_RDX14:%.*]] = fadd fast <16 x float> [[BIN_RDX12]], [[RDX_SHUF13]] 510; CHECK-NEXT: [[RDX_SHUF15:%.*]] = shufflevector <16 x float> [[BIN_RDX14]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 511; CHECK-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]] 512; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0 513; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] 514; CHECK-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]] 515; CHECK-NEXT: ret float [[OP_RDX]] 516; 517; THRESHOLD-LABEL: @f( 518; THRESHOLD-NEXT: entry: 519; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 520; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 521; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 522; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 523; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 524; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 525; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 526; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 527; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 528; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 529; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 530; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 531; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 532; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 533; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 534; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* 535; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 536; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef 537; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] 538; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] 539; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] 540; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] 541; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] 542; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] 543; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] 544; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] 545; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] 546; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] 547; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] 548; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] 549; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] 550; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] 551; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 552; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 553; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 554; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 555; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 556; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 557; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 558; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 559; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 560; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 561; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 562; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 563; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 564; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 565; THRESHOLD-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 566; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 567; THRESHOLD-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds float, float* [[X]], i64 32 568; THRESHOLD-NEXT: [[ARRAYIDX_33:%.*]] = getelementptr inbounds float, float* [[X]], i64 33 569; THRESHOLD-NEXT: [[ARRAYIDX_34:%.*]] = getelementptr inbounds float, float* [[X]], i64 34 570; THRESHOLD-NEXT: [[ARRAYIDX_35:%.*]] = getelementptr inbounds float, float* [[X]], i64 35 571; THRESHOLD-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds float, float* [[X]], i64 36 572; THRESHOLD-NEXT: [[ARRAYIDX_37:%.*]] = getelementptr inbounds float, float* [[X]], i64 37 573; THRESHOLD-NEXT: [[ARRAYIDX_38:%.*]] = getelementptr inbounds float, float* [[X]], i64 38 574; THRESHOLD-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds float, float* [[X]], i64 39 575; THRESHOLD-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds float, float* [[X]], i64 40 576; THRESHOLD-NEXT: [[ARRAYIDX_41:%.*]] = getelementptr inbounds float, float* [[X]], i64 41 577; THRESHOLD-NEXT: [[ARRAYIDX_42:%.*]] = getelementptr inbounds float, float* [[X]], i64 42 578; THRESHOLD-NEXT: [[ARRAYIDX_43:%.*]] = getelementptr inbounds float, float* [[X]], i64 43 579; THRESHOLD-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds float, float* [[X]], i64 44 580; THRESHOLD-NEXT: [[ARRAYIDX_45:%.*]] = getelementptr inbounds float, float* [[X]], i64 45 581; THRESHOLD-NEXT: [[ARRAYIDX_46:%.*]] = getelementptr inbounds float, float* [[X]], i64 46 582; THRESHOLD-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 583; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* 584; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 585; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] 586; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] 587; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] 588; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] 589; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] 590; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] 591; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] 592; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] 593; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] 594; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] 595; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] 596; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] 597; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] 598; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] 599; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] 600; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] 601; THRESHOLD-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]] 602; THRESHOLD-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]] 603; THRESHOLD-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]] 604; THRESHOLD-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]] 605; THRESHOLD-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]] 606; THRESHOLD-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]] 607; THRESHOLD-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]] 608; THRESHOLD-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]] 609; THRESHOLD-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]] 610; THRESHOLD-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]] 611; THRESHOLD-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]] 612; THRESHOLD-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]] 613; THRESHOLD-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]] 614; THRESHOLD-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]] 615; THRESHOLD-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]] 616; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 617; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]] 618; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 619; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]] 620; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 621; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 622; THRESHOLD-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 623; THRESHOLD-NEXT: [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]] 624; THRESHOLD-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 625; THRESHOLD-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] 626; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 627; THRESHOLD-NEXT: [[RDX_SHUF9:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 628; THRESHOLD-NEXT: [[BIN_RDX10:%.*]] = fadd fast <16 x float> [[TMP1]], [[RDX_SHUF9]] 629; THRESHOLD-NEXT: [[RDX_SHUF11:%.*]] = shufflevector <16 x float> [[BIN_RDX10]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 630; THRESHOLD-NEXT: [[BIN_RDX12:%.*]] = fadd fast <16 x float> [[BIN_RDX10]], [[RDX_SHUF11]] 631; THRESHOLD-NEXT: [[RDX_SHUF13:%.*]] = shufflevector <16 x float> [[BIN_RDX12]], <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 632; THRESHOLD-NEXT: [[BIN_RDX14:%.*]] = fadd fast <16 x float> [[BIN_RDX12]], [[RDX_SHUF13]] 633; THRESHOLD-NEXT: [[RDX_SHUF15:%.*]] = shufflevector <16 x float> [[BIN_RDX14]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 634; THRESHOLD-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]] 635; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0 636; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] 637; THRESHOLD-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]] 638; THRESHOLD-NEXT: ret float [[OP_RDX]] 639; 640 entry: 641 %0 = load float, float* %x, align 4 642 %arrayidx.1 = getelementptr inbounds float, float* %x, i64 1 643 %1 = load float, float* %arrayidx.1, align 4 644 %add.1 = fadd fast float %1, %0 645 %arrayidx.2 = getelementptr inbounds float, float* %x, i64 2 646 %2 = load float, float* %arrayidx.2, align 4 647 %add.2 = fadd fast float %2, %add.1 648 %arrayidx.3 = getelementptr inbounds float, float* %x, i64 3 649 %3 = load float, float* %arrayidx.3, align 4 650 %add.3 = fadd fast float %3, %add.2 651 %arrayidx.4 = getelementptr inbounds float, float* %x, i64 4 652 %4 = load float, float* %arrayidx.4, align 4 653 %add.4 = fadd fast float %4, %add.3 654 %arrayidx.5 = getelementptr inbounds float, float* %x, i64 5 655 %5 = load float, float* %arrayidx.5, align 4 656 %add.5 = fadd fast float %5, %add.4 657 %arrayidx.6 = getelementptr inbounds float, float* %x, i64 6 658 %6 = load float, float* %arrayidx.6, align 4 659 %add.6 = fadd fast float %6, %add.5 660 %arrayidx.7 = getelementptr inbounds float, float* %x, i64 7 661 %7 = load float, float* %arrayidx.7, align 4 662 %add.7 = fadd fast float %7, %add.6 663 %arrayidx.8 = getelementptr inbounds float, float* %x, i64 8 664 %8 = load float, float* %arrayidx.8, align 4 665 %add.8 = fadd fast float %8, %add.7 666 %arrayidx.9 = getelementptr inbounds float, float* %x, i64 9 667 %9 = load float, float* %arrayidx.9, align 4 668 %add.9 = fadd fast float %9, %add.8 669 %arrayidx.10 = getelementptr inbounds float, float* %x, i64 10 670 %10 = load float, float* %arrayidx.10, align 4 671 %add.10 = fadd fast float %10, %add.9 672 %arrayidx.11 = getelementptr inbounds float, float* %x, i64 11 673 %11 = load float, float* %arrayidx.11, align 4 674 %add.11 = fadd fast float %11, %add.10 675 %arrayidx.12 = getelementptr inbounds float, float* %x, i64 12 676 %12 = load float, float* %arrayidx.12, align 4 677 %add.12 = fadd fast float %12, %add.11 678 %arrayidx.13 = getelementptr inbounds float, float* %x, i64 13 679 %13 = load float, float* %arrayidx.13, align 4 680 %add.13 = fadd fast float %13, %add.12 681 %arrayidx.14 = getelementptr inbounds float, float* %x, i64 14 682 %14 = load float, float* %arrayidx.14, align 4 683 %add.14 = fadd fast float %14, %add.13 684 %arrayidx.15 = getelementptr inbounds float, float* %x, i64 15 685 %15 = load float, float* %arrayidx.15, align 4 686 %add.15 = fadd fast float %15, %add.14 687 %arrayidx.16 = getelementptr inbounds float, float* %x, i64 16 688 %16 = load float, float* %arrayidx.16, align 4 689 %add.16 = fadd fast float %16, %add.15 690 %arrayidx.17 = getelementptr inbounds float, float* %x, i64 17 691 %17 = load float, float* %arrayidx.17, align 4 692 %add.17 = fadd fast float %17, %add.16 693 %arrayidx.18 = getelementptr inbounds float, float* %x, i64 18 694 %18 = load float, float* %arrayidx.18, align 4 695 %add.18 = fadd fast float %18, %add.17 696 %arrayidx.19 = getelementptr inbounds float, float* %x, i64 19 697 %19 = load float, float* %arrayidx.19, align 4 698 %add.19 = fadd fast float %19, %add.18 699 %arrayidx.20 = getelementptr inbounds float, float* %x, i64 20 700 %20 = load float, float* %arrayidx.20, align 4 701 %add.20 = fadd fast float %20, %add.19 702 %arrayidx.21 = getelementptr inbounds float, float* %x, i64 21 703 %21 = load float, float* %arrayidx.21, align 4 704 %add.21 = fadd fast float %21, %add.20 705 %arrayidx.22 = getelementptr inbounds float, float* %x, i64 22 706 %22 = load float, float* %arrayidx.22, align 4 707 %add.22 = fadd fast float %22, %add.21 708 %arrayidx.23 = getelementptr inbounds float, float* %x, i64 23 709 %23 = load float, float* %arrayidx.23, align 4 710 %add.23 = fadd fast float %23, %add.22 711 %arrayidx.24 = getelementptr inbounds float, float* %x, i64 24 712 %24 = load float, float* %arrayidx.24, align 4 713 %add.24 = fadd fast float %24, %add.23 714 %arrayidx.25 = getelementptr inbounds float, float* %x, i64 25 715 %25 = load float, float* %arrayidx.25, align 4 716 %add.25 = fadd fast float %25, %add.24 717 %arrayidx.26 = getelementptr inbounds float, float* %x, i64 26 718 %26 = load float, float* %arrayidx.26, align 4 719 %add.26 = fadd fast float %26, %add.25 720 %arrayidx.27 = getelementptr inbounds float, float* %x, i64 27 721 %27 = load float, float* %arrayidx.27, align 4 722 %add.27 = fadd fast float %27, %add.26 723 %arrayidx.28 = getelementptr inbounds float, float* %x, i64 28 724 %28 = load float, float* %arrayidx.28, align 4 725 %add.28 = fadd fast float %28, %add.27 726 %arrayidx.29 = getelementptr inbounds float, float* %x, i64 29 727 %29 = load float, float* %arrayidx.29, align 4 728 %add.29 = fadd fast float %29, %add.28 729 %arrayidx.30 = getelementptr inbounds float, float* %x, i64 30 730 %30 = load float, float* %arrayidx.30, align 4 731 %add.30 = fadd fast float %30, %add.29 732 %arrayidx.31 = getelementptr inbounds float, float* %x, i64 31 733 %31 = load float, float* %arrayidx.31, align 4 734 %add.31 = fadd fast float %31, %add.30 735 %arrayidx.32 = getelementptr inbounds float, float* %x, i64 32 736 %32 = load float, float* %arrayidx.32, align 4 737 %add.32 = fadd fast float %32, %add.31 738 %arrayidx.33 = getelementptr inbounds float, float* %x, i64 33 739 %33 = load float, float* %arrayidx.33, align 4 740 %add.33 = fadd fast float %33, %add.32 741 %arrayidx.34 = getelementptr inbounds float, float* %x, i64 34 742 %34 = load float, float* %arrayidx.34, align 4 743 %add.34 = fadd fast float %34, %add.33 744 %arrayidx.35 = getelementptr inbounds float, float* %x, i64 35 745 %35 = load float, float* %arrayidx.35, align 4 746 %add.35 = fadd fast float %35, %add.34 747 %arrayidx.36 = getelementptr inbounds float, float* %x, i64 36 748 %36 = load float, float* %arrayidx.36, align 4 749 %add.36 = fadd fast float %36, %add.35 750 %arrayidx.37 = getelementptr inbounds float, float* %x, i64 37 751 %37 = load float, float* %arrayidx.37, align 4 752 %add.37 = fadd fast float %37, %add.36 753 %arrayidx.38 = getelementptr inbounds float, float* %x, i64 38 754 %38 = load float, float* %arrayidx.38, align 4 755 %add.38 = fadd fast float %38, %add.37 756 %arrayidx.39 = getelementptr inbounds float, float* %x, i64 39 757 %39 = load float, float* %arrayidx.39, align 4 758 %add.39 = fadd fast float %39, %add.38 759 %arrayidx.40 = getelementptr inbounds float, float* %x, i64 40 760 %40 = load float, float* %arrayidx.40, align 4 761 %add.40 = fadd fast float %40, %add.39 762 %arrayidx.41 = getelementptr inbounds float, float* %x, i64 41 763 %41 = load float, float* %arrayidx.41, align 4 764 %add.41 = fadd fast float %41, %add.40 765 %arrayidx.42 = getelementptr inbounds float, float* %x, i64 42 766 %42 = load float, float* %arrayidx.42, align 4 767 %add.42 = fadd fast float %42, %add.41 768 %arrayidx.43 = getelementptr inbounds float, float* %x, i64 43 769 %43 = load float, float* %arrayidx.43, align 4 770 %add.43 = fadd fast float %43, %add.42 771 %arrayidx.44 = getelementptr inbounds float, float* %x, i64 44 772 %44 = load float, float* %arrayidx.44, align 4 773 %add.44 = fadd fast float %44, %add.43 774 %arrayidx.45 = getelementptr inbounds float, float* %x, i64 45 775 %45 = load float, float* %arrayidx.45, align 4 776 %add.45 = fadd fast float %45, %add.44 777 %arrayidx.46 = getelementptr inbounds float, float* %x, i64 46 778 %46 = load float, float* %arrayidx.46, align 4 779 %add.46 = fadd fast float %46, %add.45 780 %arrayidx.47 = getelementptr inbounds float, float* %x, i64 47 781 %47 = load float, float* %arrayidx.47, align 4 782 %add.47 = fadd fast float %47, %add.46 783 ret float %add.47 784} 785 786define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { 787; CHECK-LABEL: @f1( 788; CHECK-NEXT: entry: 789; CHECK-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]] 790; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float 791; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 792; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 793; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 794; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 795; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 796; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 797; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 798; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 799; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 800; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 801; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 802; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 803; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 804; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 805; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 806; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 807; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 808; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 809; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 810; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 811; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 812; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 813; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 814; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 815; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 816; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 817; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 818; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 819; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 820; CHECK-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 821; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 822; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* 823; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 824; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] 825; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] 826; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] 827; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] 828; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] 829; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] 830; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] 831; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] 832; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] 833; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] 834; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] 835; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] 836; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] 837; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] 838; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] 839; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] 840; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] 841; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] 842; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] 843; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] 844; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] 845; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] 846; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] 847; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] 848; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] 849; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] 850; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] 851; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] 852; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] 853; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] 854; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] 855; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 856; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]] 857; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 858; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]] 859; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 860; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 861; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 862; CHECK-NEXT: [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]] 863; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 864; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] 865; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 866; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] 867; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] 868; CHECK-NEXT: ret float [[OP_EXTRA]] 869; 870; THRESHOLD-LABEL: @f1( 871; THRESHOLD-NEXT: entry: 872; THRESHOLD-NEXT: [[REM:%.*]] = srem i32 [[A:%.*]], [[B:%.*]] 873; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to float 874; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 875; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 876; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 877; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 878; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 879; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 880; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 881; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 882; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 883; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 884; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 885; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 886; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 887; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 888; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 889; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 890; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 891; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 892; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 893; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 894; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 895; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 896; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 897; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 898; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 899; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 900; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 901; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 902; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 903; THRESHOLD-NEXT: [[ARRAYIDX_30:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 904; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 905; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* 906; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 907; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] 908; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] 909; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] 910; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] 911; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] 912; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] 913; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] 914; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] 915; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] 916; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] 917; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] 918; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] 919; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] 920; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] 921; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] 922; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] 923; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] 924; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] 925; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] 926; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] 927; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] 928; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] 929; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] 930; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] 931; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] 932; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] 933; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] 934; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] 935; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] 936; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] 937; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] 938; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 939; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]] 940; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 941; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <32 x float> [[BIN_RDX]], [[RDX_SHUF1]] 942; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <32 x float> [[BIN_RDX2]], <32 x float> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 943; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <32 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 944; THRESHOLD-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <32 x float> [[BIN_RDX4]], <32 x float> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 945; THRESHOLD-NEXT: [[BIN_RDX6:%.*]] = fadd fast <32 x float> [[BIN_RDX4]], [[RDX_SHUF5]] 946; THRESHOLD-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x float> [[BIN_RDX6]], <32 x float> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 947; THRESHOLD-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] 948; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 949; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] 950; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] 951; THRESHOLD-NEXT: ret float [[OP_EXTRA]] 952; 953 entry: 954 %rem = srem i32 %a, %b 955 %conv = sitofp i32 %rem to float 956 %0 = load float, float* %x, align 4 957 %add = fadd fast float %0, %conv 958 %arrayidx.1 = getelementptr inbounds float, float* %x, i64 1 959 %1 = load float, float* %arrayidx.1, align 4 960 %add.1 = fadd fast float %1, %add 961 %arrayidx.2 = getelementptr inbounds float, float* %x, i64 2 962 %2 = load float, float* %arrayidx.2, align 4 963 %add.2 = fadd fast float %2, %add.1 964 %arrayidx.3 = getelementptr inbounds float, float* %x, i64 3 965 %3 = load float, float* %arrayidx.3, align 4 966 %add.3 = fadd fast float %3, %add.2 967 %arrayidx.4 = getelementptr inbounds float, float* %x, i64 4 968 %4 = load float, float* %arrayidx.4, align 4 969 %add.4 = fadd fast float %4, %add.3 970 %arrayidx.5 = getelementptr inbounds float, float* %x, i64 5 971 %5 = load float, float* %arrayidx.5, align 4 972 %add.5 = fadd fast float %5, %add.4 973 %arrayidx.6 = getelementptr inbounds float, float* %x, i64 6 974 %6 = load float, float* %arrayidx.6, align 4 975 %add.6 = fadd fast float %6, %add.5 976 %arrayidx.7 = getelementptr inbounds float, float* %x, i64 7 977 %7 = load float, float* %arrayidx.7, align 4 978 %add.7 = fadd fast float %7, %add.6 979 %arrayidx.8 = getelementptr inbounds float, float* %x, i64 8 980 %8 = load float, float* %arrayidx.8, align 4 981 %add.8 = fadd fast float %8, %add.7 982 %arrayidx.9 = getelementptr inbounds float, float* %x, i64 9 983 %9 = load float, float* %arrayidx.9, align 4 984 %add.9 = fadd fast float %9, %add.8 985 %arrayidx.10 = getelementptr inbounds float, float* %x, i64 10 986 %10 = load float, float* %arrayidx.10, align 4 987 %add.10 = fadd fast float %10, %add.9 988 %arrayidx.11 = getelementptr inbounds float, float* %x, i64 11 989 %11 = load float, float* %arrayidx.11, align 4 990 %add.11 = fadd fast float %11, %add.10 991 %arrayidx.12 = getelementptr inbounds float, float* %x, i64 12 992 %12 = load float, float* %arrayidx.12, align 4 993 %add.12 = fadd fast float %12, %add.11 994 %arrayidx.13 = getelementptr inbounds float, float* %x, i64 13 995 %13 = load float, float* %arrayidx.13, align 4 996 %add.13 = fadd fast float %13, %add.12 997 %arrayidx.14 = getelementptr inbounds float, float* %x, i64 14 998 %14 = load float, float* %arrayidx.14, align 4 999 %add.14 = fadd fast float %14, %add.13 1000 %arrayidx.15 = getelementptr inbounds float, float* %x, i64 15 1001 %15 = load float, float* %arrayidx.15, align 4 1002 %add.15 = fadd fast float %15, %add.14 1003 %arrayidx.16 = getelementptr inbounds float, float* %x, i64 16 1004 %16 = load float, float* %arrayidx.16, align 4 1005 %add.16 = fadd fast float %16, %add.15 1006 %arrayidx.17 = getelementptr inbounds float, float* %x, i64 17 1007 %17 = load float, float* %arrayidx.17, align 4 1008 %add.17 = fadd fast float %17, %add.16 1009 %arrayidx.18 = getelementptr inbounds float, float* %x, i64 18 1010 %18 = load float, float* %arrayidx.18, align 4 1011 %add.18 = fadd fast float %18, %add.17 1012 %arrayidx.19 = getelementptr inbounds float, float* %x, i64 19 1013 %19 = load float, float* %arrayidx.19, align 4 1014 %add.19 = fadd fast float %19, %add.18 1015 %arrayidx.20 = getelementptr inbounds float, float* %x, i64 20 1016 %20 = load float, float* %arrayidx.20, align 4 1017 %add.20 = fadd fast float %20, %add.19 1018 %arrayidx.21 = getelementptr inbounds float, float* %x, i64 21 1019 %21 = load float, float* %arrayidx.21, align 4 1020 %add.21 = fadd fast float %21, %add.20 1021 %arrayidx.22 = getelementptr inbounds float, float* %x, i64 22 1022 %22 = load float, float* %arrayidx.22, align 4 1023 %add.22 = fadd fast float %22, %add.21 1024 %arrayidx.23 = getelementptr inbounds float, float* %x, i64 23 1025 %23 = load float, float* %arrayidx.23, align 4 1026 %add.23 = fadd fast float %23, %add.22 1027 %arrayidx.24 = getelementptr inbounds float, float* %x, i64 24 1028 %24 = load float, float* %arrayidx.24, align 4 1029 %add.24 = fadd fast float %24, %add.23 1030 %arrayidx.25 = getelementptr inbounds float, float* %x, i64 25 1031 %25 = load float, float* %arrayidx.25, align 4 1032 %add.25 = fadd fast float %25, %add.24 1033 %arrayidx.26 = getelementptr inbounds float, float* %x, i64 26 1034 %26 = load float, float* %arrayidx.26, align 4 1035 %add.26 = fadd fast float %26, %add.25 1036 %arrayidx.27 = getelementptr inbounds float, float* %x, i64 27 1037 %27 = load float, float* %arrayidx.27, align 4 1038 %add.27 = fadd fast float %27, %add.26 1039 %arrayidx.28 = getelementptr inbounds float, float* %x, i64 28 1040 %28 = load float, float* %arrayidx.28, align 4 1041 %add.28 = fadd fast float %28, %add.27 1042 %arrayidx.29 = getelementptr inbounds float, float* %x, i64 29 1043 %29 = load float, float* %arrayidx.29, align 4 1044 %add.29 = fadd fast float %29, %add.28 1045 %arrayidx.30 = getelementptr inbounds float, float* %x, i64 30 1046 %30 = load float, float* %arrayidx.30, align 4 1047 %add.30 = fadd fast float %30, %add.29 1048 %arrayidx.31 = getelementptr inbounds float, float* %x, i64 31 1049 %31 = load float, float* %arrayidx.31, align 4 1050 %add.31 = fadd fast float %31, %add.30 1051 ret float %add.31 1052} 1053 1054define float @loadadd31(float* nocapture readonly %x) { 1055; CHECK-LABEL: @loadadd31( 1056; CHECK-NEXT: entry: 1057; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 1058; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 1059; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 1060; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 1061; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]] 1062; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 1063; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 1064; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 1065; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 1066; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* 1067; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 1068; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] 1069; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] 1070; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] 1071; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] 1072; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 1073; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 1074; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 1075; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 1076; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 1077; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 1078; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 1079; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 1080; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* 1081; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 1082; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] 1083; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] 1084; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] 1085; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] 1086; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] 1087; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] 1088; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] 1089; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] 1090; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 1091; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 1092; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 1093; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 1094; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 1095; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 1096; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 1097; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 1098; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 1099; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 1100; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 1101; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 1102; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 1103; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 1104; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 1105; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 1106; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* 1107; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 1108; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] 1109; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] 1110; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] 1111; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] 1112; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] 1113; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] 1114; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] 1115; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] 1116; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] 1117; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] 1118; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] 1119; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] 1120; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] 1121; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] 1122; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] 1123; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1124; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]] 1125; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1126; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <16 x float> [[BIN_RDX]], [[RDX_SHUF1]] 1127; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <16 x float> [[BIN_RDX2]], <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1128; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <16 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 1129; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1130; CHECK-NEXT: [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]] 1131; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0 1132; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1133; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <8 x float> [[TMP5]], [[RDX_SHUF7]] 1134; CHECK-NEXT: [[RDX_SHUF9:%.*]] = shufflevector <8 x float> [[BIN_RDX8]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1135; CHECK-NEXT: [[BIN_RDX10:%.*]] = fadd fast <8 x float> [[BIN_RDX8]], [[RDX_SHUF9]] 1136; CHECK-NEXT: [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[BIN_RDX10]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1137; CHECK-NEXT: [[BIN_RDX12:%.*]] = fadd fast <8 x float> [[BIN_RDX10]], [[RDX_SHUF11]] 1138; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[BIN_RDX12]], i32 0 1139; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] 1140; CHECK-NEXT: [[RDX_SHUF13:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1141; CHECK-NEXT: [[BIN_RDX14:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF13]] 1142; CHECK-NEXT: [[RDX_SHUF15:%.*]] = shufflevector <4 x float> [[BIN_RDX14]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1143; CHECK-NEXT: [[BIN_RDX16:%.*]] = fadd fast <4 x float> [[BIN_RDX14]], [[RDX_SHUF15]] 1144; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[BIN_RDX16]], i32 0 1145; CHECK-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] 1146; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]] 1147; CHECK-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] 1148; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] 1149; CHECK-NEXT: ret float [[TMP12]] 1150; 1151; THRESHOLD-LABEL: @loadadd31( 1152; THRESHOLD-NEXT: entry: 1153; THRESHOLD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 1154; THRESHOLD-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 1155; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 1156; THRESHOLD-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 1157; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]] 1158; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 1159; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 1160; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 1161; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 1162; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* 1163; THRESHOLD-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 1164; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] 1165; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] 1166; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] 1167; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] 1168; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 1169; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 1170; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 1171; THRESHOLD-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 1172; THRESHOLD-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 1173; THRESHOLD-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 1174; THRESHOLD-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 1175; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 1176; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* 1177; THRESHOLD-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 1178; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] 1179; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] 1180; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] 1181; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] 1182; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] 1183; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] 1184; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] 1185; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] 1186; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 1187; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 1188; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 1189; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 1190; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 1191; THRESHOLD-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 1192; THRESHOLD-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 1193; THRESHOLD-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 1194; THRESHOLD-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 1195; THRESHOLD-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 1196; THRESHOLD-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 1197; THRESHOLD-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 1198; THRESHOLD-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 1199; THRESHOLD-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 1200; THRESHOLD-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 1201; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 1202; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* 1203; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 1204; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] 1205; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] 1206; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] 1207; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] 1208; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] 1209; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] 1210; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] 1211; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] 1212; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] 1213; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] 1214; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] 1215; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] 1216; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] 1217; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] 1218; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] 1219; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1220; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]] 1221; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1222; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <16 x float> [[BIN_RDX]], [[RDX_SHUF1]] 1223; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <16 x float> [[BIN_RDX2]], <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1224; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <16 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 1225; THRESHOLD-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1226; THRESHOLD-NEXT: [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]] 1227; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0 1228; THRESHOLD-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1229; THRESHOLD-NEXT: [[BIN_RDX8:%.*]] = fadd fast <8 x float> [[TMP5]], [[RDX_SHUF7]] 1230; THRESHOLD-NEXT: [[RDX_SHUF9:%.*]] = shufflevector <8 x float> [[BIN_RDX8]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1231; THRESHOLD-NEXT: [[BIN_RDX10:%.*]] = fadd fast <8 x float> [[BIN_RDX8]], [[RDX_SHUF9]] 1232; THRESHOLD-NEXT: [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[BIN_RDX10]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1233; THRESHOLD-NEXT: [[BIN_RDX12:%.*]] = fadd fast <8 x float> [[BIN_RDX10]], [[RDX_SHUF11]] 1234; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[BIN_RDX12]], i32 0 1235; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]] 1236; THRESHOLD-NEXT: [[RDX_SHUF13:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1237; THRESHOLD-NEXT: [[BIN_RDX14:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF13]] 1238; THRESHOLD-NEXT: [[RDX_SHUF15:%.*]] = shufflevector <4 x float> [[BIN_RDX14]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1239; THRESHOLD-NEXT: [[BIN_RDX16:%.*]] = fadd fast <4 x float> [[BIN_RDX14]], [[RDX_SHUF15]] 1240; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[BIN_RDX16]], i32 0 1241; THRESHOLD-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] 1242; THRESHOLD-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]] 1243; THRESHOLD-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] 1244; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] 1245; THRESHOLD-NEXT: ret float [[TMP12]] 1246; 1247 entry: 1248 %arrayidx = getelementptr inbounds float, float* %x, i64 1 1249 %0 = load float, float* %arrayidx, align 4 1250 %arrayidx.1 = getelementptr inbounds float, float* %x, i64 2 1251 %1 = load float, float* %arrayidx.1, align 4 1252 %add.1 = fadd fast float %1, %0 1253 %arrayidx.2 = getelementptr inbounds float, float* %x, i64 3 1254 %2 = load float, float* %arrayidx.2, align 4 1255 %add.2 = fadd fast float %2, %add.1 1256 %arrayidx.3 = getelementptr inbounds float, float* %x, i64 4 1257 %3 = load float, float* %arrayidx.3, align 4 1258 %add.3 = fadd fast float %3, %add.2 1259 %arrayidx.4 = getelementptr inbounds float, float* %x, i64 5 1260 %4 = load float, float* %arrayidx.4, align 4 1261 %add.4 = fadd fast float %4, %add.3 1262 %arrayidx.5 = getelementptr inbounds float, float* %x, i64 6 1263 %5 = load float, float* %arrayidx.5, align 4 1264 %add.5 = fadd fast float %5, %add.4 1265 %arrayidx.6 = getelementptr inbounds float, float* %x, i64 7 1266 %6 = load float, float* %arrayidx.6, align 4 1267 %add.6 = fadd fast float %6, %add.5 1268 %arrayidx.7 = getelementptr inbounds float, float* %x, i64 8 1269 %7 = load float, float* %arrayidx.7, align 4 1270 %add.7 = fadd fast float %7, %add.6 1271 %arrayidx.8 = getelementptr inbounds float, float* %x, i64 9 1272 %8 = load float, float* %arrayidx.8, align 4 1273 %add.8 = fadd fast float %8, %add.7 1274 %arrayidx.9 = getelementptr inbounds float, float* %x, i64 10 1275 %9 = load float, float* %arrayidx.9, align 4 1276 %add.9 = fadd fast float %9, %add.8 1277 %arrayidx.10 = getelementptr inbounds float, float* %x, i64 11 1278 %10 = load float, float* %arrayidx.10, align 4 1279 %add.10 = fadd fast float %10, %add.9 1280 %arrayidx.11 = getelementptr inbounds float, float* %x, i64 12 1281 %11 = load float, float* %arrayidx.11, align 4 1282 %add.11 = fadd fast float %11, %add.10 1283 %arrayidx.12 = getelementptr inbounds float, float* %x, i64 13 1284 %12 = load float, float* %arrayidx.12, align 4 1285 %add.12 = fadd fast float %12, %add.11 1286 %arrayidx.13 = getelementptr inbounds float, float* %x, i64 14 1287 %13 = load float, float* %arrayidx.13, align 4 1288 %add.13 = fadd fast float %13, %add.12 1289 %arrayidx.14 = getelementptr inbounds float, float* %x, i64 15 1290 %14 = load float, float* %arrayidx.14, align 4 1291 %add.14 = fadd fast float %14, %add.13 1292 %arrayidx.15 = getelementptr inbounds float, float* %x, i64 16 1293 %15 = load float, float* %arrayidx.15, align 4 1294 %add.15 = fadd fast float %15, %add.14 1295 %arrayidx.16 = getelementptr inbounds float, float* %x, i64 17 1296 %16 = load float, float* %arrayidx.16, align 4 1297 %add.16 = fadd fast float %16, %add.15 1298 %arrayidx.17 = getelementptr inbounds float, float* %x, i64 18 1299 %17 = load float, float* %arrayidx.17, align 4 1300 %add.17 = fadd fast float %17, %add.16 1301 %arrayidx.18 = getelementptr inbounds float, float* %x, i64 19 1302 %18 = load float, float* %arrayidx.18, align 4 1303 %add.18 = fadd fast float %18, %add.17 1304 %arrayidx.19 = getelementptr inbounds float, float* %x, i64 20 1305 %19 = load float, float* %arrayidx.19, align 4 1306 %add.19 = fadd fast float %19, %add.18 1307 %arrayidx.20 = getelementptr inbounds float, float* %x, i64 21 1308 %20 = load float, float* %arrayidx.20, align 4 1309 %add.20 = fadd fast float %20, %add.19 1310 %arrayidx.21 = getelementptr inbounds float, float* %x, i64 22 1311 %21 = load float, float* %arrayidx.21, align 4 1312 %add.21 = fadd fast float %21, %add.20 1313 %arrayidx.22 = getelementptr inbounds float, float* %x, i64 23 1314 %22 = load float, float* %arrayidx.22, align 4 1315 %add.22 = fadd fast float %22, %add.21 1316 %arrayidx.23 = getelementptr inbounds float, float* %x, i64 24 1317 %23 = load float, float* %arrayidx.23, align 4 1318 %add.23 = fadd fast float %23, %add.22 1319 %arrayidx.24 = getelementptr inbounds float, float* %x, i64 25 1320 %24 = load float, float* %arrayidx.24, align 4 1321 %add.24 = fadd fast float %24, %add.23 1322 %arrayidx.25 = getelementptr inbounds float, float* %x, i64 26 1323 %25 = load float, float* %arrayidx.25, align 4 1324 %add.25 = fadd fast float %25, %add.24 1325 %arrayidx.26 = getelementptr inbounds float, float* %x, i64 27 1326 %26 = load float, float* %arrayidx.26, align 4 1327 %add.26 = fadd fast float %26, %add.25 1328 %arrayidx.27 = getelementptr inbounds float, float* %x, i64 28 1329 %27 = load float, float* %arrayidx.27, align 4 1330 %add.27 = fadd fast float %27, %add.26 1331 %arrayidx.28 = getelementptr inbounds float, float* %x, i64 29 1332 %28 = load float, float* %arrayidx.28, align 4 1333 %add.28 = fadd fast float %28, %add.27 1334 %arrayidx.29 = getelementptr inbounds float, float* %x, i64 30 1335 %29 = load float, float* %arrayidx.29, align 4 1336 %add.29 = fadd fast float %29, %add.28 1337 ret float %add.29 1338} 1339 1340define float @extra_args(float* nocapture readonly %x, i32 %a, i32 %b) { 1341; CHECK-LABEL: @extra_args( 1342; CHECK-NEXT: entry: 1343; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] 1344; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 1345; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 1346; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 1347; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 1348; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 1349; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 1350; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 1351; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 1352; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 1353; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* 1354; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 1355; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] 1356; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] 1357; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]] 1358; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] 1359; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] 1360; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] 1361; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] 1362; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] 1363; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1364; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] 1365; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1366; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] 1367; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1368; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 1369; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 1370; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] 1371; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] 1372; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] 1373; CHECK-NEXT: ret float [[OP_EXTRA5]] 1374; 1375; THRESHOLD-LABEL: @extra_args( 1376; THRESHOLD-NEXT: entry: 1377; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] 1378; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 1379; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 1380; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 1381; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 1382; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 1383; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 1384; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 1385; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 1386; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 1387; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* 1388; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 1389; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] 1390; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] 1391; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]] 1392; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] 1393; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] 1394; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] 1395; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] 1396; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] 1397; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1398; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] 1399; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1400; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] 1401; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1402; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 1403; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 1404; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] 1405; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] 1406; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] 1407; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] 1408; 1409 entry: 1410 %mul = mul nsw i32 %b, %a 1411 %conv = sitofp i32 %mul to float 1412 %0 = load float, float* %x, align 4 1413 %add = fadd fast float %conv, 3.000000e+00 1414 %add1 = fadd fast float %0, %add 1415 %arrayidx3 = getelementptr inbounds float, float* %x, i64 1 1416 %1 = load float, float* %arrayidx3, align 4 1417 %add4 = fadd fast float %1, %add1 1418 %add5 = fadd fast float %add4, %conv 1419 %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2 1420 %2 = load float, float* %arrayidx3.1, align 4 1421 %add4.1 = fadd fast float %2, %add5 1422 %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3 1423 %3 = load float, float* %arrayidx3.2, align 4 1424 %add4.2 = fadd fast float %3, %add4.1 1425 %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4 1426 %4 = load float, float* %arrayidx3.3, align 4 1427 %add4.3 = fadd fast float %4, %add4.2 1428 %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5 1429 %5 = load float, float* %arrayidx3.4, align 4 1430 %add4.4 = fadd fast float %5, %add4.3 1431 %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6 1432 %6 = load float, float* %arrayidx3.5, align 4 1433 %add4.5 = fadd fast float %6, %add4.4 1434 %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7 1435 %7 = load float, float* %arrayidx3.6, align 4 1436 %add4.6 = fadd fast float %7, %add4.5 1437 ret float %add4.6 1438} 1439 1440define float @extra_args_same_several_times(float* nocapture readonly %x, i32 %a, i32 %b) { 1441; CHECK-LABEL: @extra_args_same_several_times( 1442; CHECK-NEXT: entry: 1443; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] 1444; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 1445; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 1446; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 1447; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 1448; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 1449; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 1450; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 1451; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 1452; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 1453; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* 1454; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 1455; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] 1456; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] 1457; CHECK-NEXT: [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00 1458; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]] 1459; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] 1460; CHECK-NEXT: [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00 1461; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]] 1462; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] 1463; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] 1464; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] 1465; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1466; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] 1467; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1468; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] 1469; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1470; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 1471; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 1472; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] 1473; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 1474; CHECK-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00 1475; CHECK-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]] 1476; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] 1477; CHECK-NEXT: ret float [[OP_EXTRA7]] 1478; 1479; THRESHOLD-LABEL: @extra_args_same_several_times( 1480; THRESHOLD-NEXT: entry: 1481; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] 1482; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 1483; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], 3.000000e+00 1484; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 1485; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 1486; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 1487; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 1488; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 1489; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 1490; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 1491; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* 1492; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 1493; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] 1494; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] 1495; THRESHOLD-NEXT: [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00 1496; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]] 1497; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] 1498; THRESHOLD-NEXT: [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00 1499; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]] 1500; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] 1501; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] 1502; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] 1503; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1504; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] 1505; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1506; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] 1507; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1508; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 1509; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 1510; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] 1511; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 1512; THRESHOLD-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00 1513; THRESHOLD-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]] 1514; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] 1515; THRESHOLD-NEXT: ret float [[OP_EXTRA7]] 1516; 1517 entry: 1518 %mul = mul nsw i32 %b, %a 1519 %conv = sitofp i32 %mul to float 1520 %0 = load float, float* %x, align 4 1521 %add = fadd fast float %conv, 3.000000e+00 1522 %add1 = fadd fast float %0, %add 1523 %arrayidx3 = getelementptr inbounds float, float* %x, i64 1 1524 %1 = load float, float* %arrayidx3, align 4 1525 %add4 = fadd fast float %1, %add1 1526 %add41 = fadd fast float %add4, 5.000000e+00 1527 %add5 = fadd fast float %add41, %conv 1528 %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2 1529 %2 = load float, float* %arrayidx3.1, align 4 1530 %add4.1 = fadd fast float %2, %add5 1531 %add4.11 = fadd fast float %add4.1, 5.000000e+00 1532 %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3 1533 %3 = load float, float* %arrayidx3.2, align 4 1534 %add4.2 = fadd fast float %3, %add4.11 1535 %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4 1536 %4 = load float, float* %arrayidx3.3, align 4 1537 %add4.3 = fadd fast float %4, %add4.2 1538 %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5 1539 %5 = load float, float* %arrayidx3.4, align 4 1540 %add4.4 = fadd fast float %5, %add4.3 1541 %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6 1542 %6 = load float, float* %arrayidx3.5, align 4 1543 %add4.5 = fadd fast float %6, %add4.4 1544 %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7 1545 %7 = load float, float* %arrayidx3.6, align 4 1546 %add4.6 = fadd fast float %7, %add4.5 1547 ret float %add4.6 1548} 1549 1550define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b, i32 %c) { 1551; CHECK-LABEL: @extra_args_no_replace( 1552; CHECK-NEXT: entry: 1553; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] 1554; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 1555; CHECK-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float 1556; CHECK-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00 1557; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]] 1558; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 1559; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 1560; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 1561; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 1562; CHECK-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 1563; CHECK-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 1564; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 1565; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* 1566; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 1567; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] 1568; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] 1569; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]] 1570; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] 1571; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] 1572; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]] 1573; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]] 1574; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] 1575; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1576; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] 1577; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1578; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] 1579; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1580; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 1581; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 1582; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] 1583; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] 1584; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] 1585; CHECK-NEXT: ret float [[OP_EXTRA5]] 1586; 1587; THRESHOLD-LABEL: @extra_args_no_replace( 1588; THRESHOLD-NEXT: entry: 1589; THRESHOLD-NEXT: [[MUL:%.*]] = mul nsw i32 [[B:%.*]], [[A:%.*]] 1590; THRESHOLD-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float 1591; THRESHOLD-NEXT: [[CONVC:%.*]] = sitofp i32 [[C:%.*]] to float 1592; THRESHOLD-NEXT: [[ADDC:%.*]] = fadd fast float [[CONVC]], 3.000000e+00 1593; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float [[CONV]], [[ADDC]] 1594; THRESHOLD-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 1595; THRESHOLD-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 1596; THRESHOLD-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 1597; THRESHOLD-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 1598; THRESHOLD-NEXT: [[ARRAYIDX3_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 1599; THRESHOLD-NEXT: [[ARRAYIDX3_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 1600; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 1601; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* 1602; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 1603; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] 1604; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] 1605; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]] 1606; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] 1607; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] 1608; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]] 1609; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]] 1610; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] 1611; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1612; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] 1613; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1614; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <8 x float> [[BIN_RDX]], [[RDX_SHUF1]] 1615; THRESHOLD-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1616; THRESHOLD-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]] 1617; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 1618; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] 1619; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] 1620; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] 1621; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] 1622; 1623 entry: 1624 %mul = mul nsw i32 %b, %a 1625 %conv = sitofp i32 %mul to float 1626 %0 = load float, float* %x, align 4 1627 %convc = sitofp i32 %c to float 1628 %addc = fadd fast float %convc, 3.000000e+00 1629 %add = fadd fast float %conv, %addc 1630 %add1 = fadd fast float %0, %add 1631 %arrayidx3 = getelementptr inbounds float, float* %x, i64 1 1632 %1 = load float, float* %arrayidx3, align 4 1633 %add4 = fadd fast float %1, %add1 1634 %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2 1635 %2 = load float, float* %arrayidx3.1, align 4 1636 %add4.1 = fadd fast float %2, %add4 1637 %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3 1638 %3 = load float, float* %arrayidx3.2, align 4 1639 %add4.2 = fadd fast float %3, %add4.1 1640 %arrayidx3.3 = getelementptr inbounds float, float* %x, i64 4 1641 %4 = load float, float* %arrayidx3.3, align 4 1642 %add4.3 = fadd fast float %4, %add4.2 1643 %add5 = fadd fast float %add4.3, %conv 1644 %arrayidx3.4 = getelementptr inbounds float, float* %x, i64 5 1645 %5 = load float, float* %arrayidx3.4, align 4 1646 %add4.4 = fadd fast float %5, %add5 1647 %arrayidx3.5 = getelementptr inbounds float, float* %x, i64 6 1648 %6 = load float, float* %arrayidx3.5, align 4 1649 %add4.5 = fadd fast float %6, %add4.4 1650 %arrayidx3.6 = getelementptr inbounds float, float* %x, i64 7 1651 %7 = load float, float* %arrayidx3.6, align 4 1652 %add4.6 = fadd fast float %7, %add4.5 1653 ret float %add4.6 1654} 1655 1656define i32 @wobble(i32 %arg, i32 %bar) { 1657; CHECK-LABEL: @wobble( 1658; CHECK-NEXT: bb: 1659; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[ARG:%.*]], i32 0 1660; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1 1661; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2 1662; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3 1663; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[BAR:%.*]], i32 0 1664; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1 1665; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2 1666; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3 1667; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP3]], [[TMP7]] 1668; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 1669; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer 1670; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> 1671; CHECK-NEXT: [[R1:%.*]] = add nuw i32 [[ARG]], undef 1672; CHECK-NEXT: [[R2:%.*]] = add nsw i32 [[R1]], undef 1673; CHECK-NEXT: [[R3:%.*]] = add nsw i32 [[R2]], undef 1674; CHECK-NEXT: [[R4:%.*]] = add nsw i32 [[R3]], undef 1675; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1676; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]] 1677; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1678; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] 1679; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 1680; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] 1681; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] 1682; CHECK-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]] 1683; CHECK-NEXT: ret i32 [[OP_EXTRA3]] 1684; 1685; THRESHOLD-LABEL: @wobble( 1686; THRESHOLD-NEXT: bb: 1687; THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[ARG:%.*]], i32 0 1688; THRESHOLD-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1 1689; THRESHOLD-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2 1690; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3 1691; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[BAR:%.*]], i32 0 1692; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1 1693; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2 1694; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3 1695; THRESHOLD-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP3]], [[TMP7]] 1696; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 1697; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer 1698; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> 1699; THRESHOLD-NEXT: [[R1:%.*]] = add nuw i32 [[ARG]], undef 1700; THRESHOLD-NEXT: [[R2:%.*]] = add nsw i32 [[R1]], undef 1701; THRESHOLD-NEXT: [[R3:%.*]] = add nsw i32 [[R2]], undef 1702; THRESHOLD-NEXT: [[R4:%.*]] = add nsw i32 [[R3]], undef 1703; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 1704; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]] 1705; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 1706; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] 1707; THRESHOLD-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 1708; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] 1709; THRESHOLD-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] 1710; THRESHOLD-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]] 1711; THRESHOLD-NEXT: ret i32 [[OP_EXTRA3]] 1712; 1713 bb: 1714 %x1 = xor i32 %arg, %bar 1715 %i1 = icmp eq i32 %x1, 0 1716 %s1 = sext i1 %i1 to i32 1717 %x2 = xor i32 %arg, %bar 1718 %i2 = icmp eq i32 %x2, 0 1719 %s2 = sext i1 %i2 to i32 1720 %x3 = xor i32 %arg, %bar 1721 %i3 = icmp eq i32 %x3, 0 1722 %s3 = sext i1 %i3 to i32 1723 %x4 = xor i32 %arg, %bar 1724 %i4 = icmp eq i32 %x4, 0 1725 %s4 = sext i1 %i4 to i32 1726 %r1 = add nuw i32 %arg, %s1 1727 %r2 = add nsw i32 %r1, %s2 1728 %r3 = add nsw i32 %r2, %s3 1729 %r4 = add nsw i32 %r3, %s4 1730 %r5 = add nsw i32 %r4, %x4 1731 ret i32 %r5 1732} 1733 1734