1; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512 2; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2 3; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX 4 5target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 6target triple = "x86_64-apple-macosx10.8.0" 7 8define i32 @add(i32 %arg) { 9; CHECK-LABEL: for function 'add' 10 ; -- Same size registeres -- 11 ;CHECK: cost of 1 {{.*}} zext 12 %A = zext <4 x i1> undef to <4 x i32> 13 ;CHECK: cost of 2 {{.*}} sext 14 %B = sext <4 x i1> undef to <4 x i32> 15 ;CHECK: cost of 0 {{.*}} trunc 16 %C = trunc <4 x i32> undef to <4 x i1> 17 18 ; -- Different size registers -- 19 ;CHECK-NOT: cost of 1 {{.*}} zext 20 %D = zext <8 x i1> undef to <8 x i32> 21 ;CHECK-NOT: cost of 2 {{.*}} sext 22 %E = sext <8 x i1> undef to <8 x i32> 23 ;CHECK-NOT: cost of 2 {{.*}} trunc 24 %F = trunc <8 x i32> undef to <8 x i1> 25 26 ; -- scalars -- 27 28 ;CHECK: cost of 1 {{.*}} zext 29 %G = zext i1 undef to i32 30 ;CHECK: cost of 0 {{.*}} trunc 31 %H = trunc i32 undef to i1 32 33 ;CHECK: cost of 0 {{.*}} ret 34 ret i32 undef 35} 36 37define i32 @zext_sext(<8 x i1> %in) { 38; CHECK-AVX2-LABEL: for function 'zext_sext' 39; CHECK-AVX-LABEL: for function 'zext_sext' 40 ;CHECK-AVX2: cost of 3 {{.*}} zext 41 ;CHECK-AVX: cost of 4 {{.*}} zext 42 %Z = zext <8 x i1> %in to <8 x i32> 43 ;CHECK-AVX2: cost of 3 {{.*}} sext 44 ;CHECK-AVX: cost of 7 {{.*}} sext 45 %S = sext <8 x i1> %in to <8 x i32> 46 47 ;CHECK-AVX2: cost of 1 {{.*}} zext 48 ;CHECK-AVX: cost of 4 {{.*}} zext 49 %A1 = zext <16 x i8> undef to <16 x i16> 50 ;CHECK-AVX2: cost of 1 {{.*}} sext 51 ;CHECK-AVX: cost of 4 {{.*}} sext 52 %A2 = sext <16 x i8> undef to <16 x i16> 53 ;CHECK-AVX2: cost of 1 {{.*}} sext 54 ;CHECK-AVX: cost of 4 {{.*}} sext 55 %A = sext <8 x i16> undef to <8 x i32> 56 ;CHECK-AVX2: cost of 1 {{.*}} zext 57 ;CHECK-AVX: cost of 4 {{.*}} zext 58 %B = zext <8 x i16> undef to <8 x i32> 59 ;CHECK-AVX2: cost of 1 {{.*}} sext 60 ;CHECK-AVX: cost of 4 {{.*}} sext 61 %C = sext <4 x i32> undef to <4 x i64> 62 63 ;CHECK-AVX2: cost of 3 {{.*}} zext 64 ;CHECK-AVX: cost of 4 {{.*}} zext 65 %C.v8i8.z = zext <8 x i8> undef to <8 x i32> 66 ;CHECK-AVX2: cost of 3 {{.*}} sext 67 ;CHECK-AVX: cost of 7 {{.*}} sext 68 %C.v8i8.s = sext <8 x i8> undef to <8 x i32> 69 ;CHECK-AVX2: cost of 3 {{.*}} zext 70 ;CHECK-AVX: cost of 3 {{.*}} zext 71 %C.v4i16.z = zext <4 x i16> undef to <4 x i64> 72 ;CHECK-AVX2: cost of 3 {{.*}} sext 73 ;CHECK-AVX: cost of 6 {{.*}} sext 74 %C.v4i16.s = sext <4 x i16> undef to <4 x i64> 75 76 ;CHECK-AVX2: cost of 3 {{.*}} zext 77 ;CHECK-AVX: cost of 4 {{.*}} zext 78 %C.v4i8.z = zext <4 x i8> undef to <4 x i64> 79 ;CHECK-AVX2: cost of 3 {{.*}} sext 80 ;CHECK-AVX: cost of 6 {{.*}} sext 81 %C.v4i8.s = sext <4 x i8> undef to <4 x i64> 82 83 ;CHECK-AVX2: cost of 1 {{.*}} zext 84 ;CHECK-AVX: cost of 4 {{.*}} zext 85 %D = zext <4 x i32> undef to <4 x i64> 86 87 ;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext 88 %D1 = zext <16 x i32> undef to <16 x i64> 89 90 ;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext 91 %D2 = sext <16 x i32> undef to <16 x i64> 92 93 ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext 94 %D3 = zext <16 x i16> undef to <16 x i32> 95 ;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext 96 %D4 = zext <16 x i8> undef to <16 x i32> 97 ;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext 98 %D5 = zext <16 x i1> undef to <16 x i32> 99 100 ;CHECK-AVX2: cost of 2 {{.*}} trunc 101 ;CHECK-AVX: cost of 4 {{.*}} trunc 102 %E = trunc <4 x i64> undef to <4 x i32> 103 ;CHECK-AVX2: cost of 2 {{.*}} trunc 104 ;CHECK-AVX: cost of 5 {{.*}} trunc 105 %F = trunc <8 x i32> undef to <8 x i16> 106 ;CHECK-AVX2: cost of 4 {{.*}} trunc 107 ;CHECK-AVX: cost of 4 {{.*}} trunc 108 %F1 = trunc <16 x i16> undef to <16 x i8> 109 ;CHECK-AVX2: cost of 2 {{.*}} trunc 110 ;CHECK-AVX: cost of 4 {{.*}} trunc 111 %F2 = trunc <8 x i32> undef to <8 x i8> 112 ;CHECK-AVX2: cost of 2 {{.*}} trunc 113 ;CHECK-AVX: cost of 4 {{.*}} trunc 114 %F3 = trunc <4 x i64> undef to <4 x i8> 115 116 ;CHECK-AVX2: cost of 4 {{.*}} trunc 117 ;CHECK-AVX: cost of 9 {{.*}} trunc 118 ;CHECK_AVX512: cost of 1 {{.*}} G = trunc 119 %G = trunc <8 x i64> undef to <8 x i32> 120 121 ;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc 122 %G1 = trunc <16 x i64> undef to <16 x i32> 123 124 ret i32 undef 125} 126 127define i32 @masks8(<8 x i1> %in) { 128; CHECK-AVX2-LABEL: for function 'masks8' 129; CHECK-AVX-LABEL: for function 'masks8' 130 131 ;CHECK-AVX2: cost of 3 {{.*}} zext 132 ;CHECK-AVX: cost of 4 {{.*}} zext 133 %Z = zext <8 x i1> %in to <8 x i32> 134 ;CHECK-AVX2: cost of 3 {{.*}} sext 135 ;CHECK-AVX: cost of 7 {{.*}} sext 136 %S = sext <8 x i1> %in to <8 x i32> 137 ret i32 undef 138} 139 140define i32 @masks4(<4 x i1> %in) { 141; CHECK-AVX2-LABEL: for function 'masks4' 142; CHECK-AVX-LABEL: for function 'masks4' 143 144 ;CHECK-AVX2: cost of 3 {{.*}} zext 145 ;CHECK-AVX: cost of 4 {{.*}} zext 146 %Z = zext <4 x i1> %in to <4 x i64> 147 ;CHECK-AVX2: cost of 3 {{.*}} sext 148 ;CHECK-AVX: cost of 6 {{.*}} sext 149 %S = sext <4 x i1> %in to <4 x i64> 150 ret i32 undef 151} 152 153define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { 154; CHECK-LABEL: for function 'sitofp4' 155 ; CHECK: cost of 3 {{.*}} sitofp 156 %A1 = sitofp <4 x i1> %a to <4 x float> 157 ; CHECK: cost of 3 {{.*}} sitofp 158 %A2 = sitofp <4 x i1> %a to <4 x double> 159 160 ; CHECK: cost of 3 {{.*}} sitofp 161 %B1 = sitofp <4 x i8> %b to <4 x float> 162 ; CHECK: cost of 3 {{.*}} sitofp 163 %B2 = sitofp <4 x i8> %b to <4 x double> 164 165 ; CHECK: cost of 3 {{.*}} sitofp 166 %C1 = sitofp <4 x i16> %c to <4 x float> 167 ; CHECK: cost of 3 {{.*}} sitofp 168 %C2 = sitofp <4 x i16> %c to <4 x double> 169 170 ; CHECK: cost of 1 {{.*}} sitofp 171 %D1 = sitofp <4 x i32> %d to <4 x float> 172 ; CHECK: cost of 1 {{.*}} sitofp 173 %D2 = sitofp <4 x i32> %d to <4 x double> 174 ret void 175} 176 177define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { 178; CHECK-LABEL: for function 'sitofp8' 179 ; CHECK: cost of 8 {{.*}} sitofp 180 %A1 = sitofp <8 x i1> %a to <8 x float> 181 182 ; CHECK: cost of 8 {{.*}} sitofp 183 %B1 = sitofp <8 x i8> %b to <8 x float> 184 185 ; CHECK: cost of 5 {{.*}} sitofp 186 %C1 = sitofp <8 x i16> %c to <8 x float> 187 188 ; CHECK: cost of 1 {{.*}} sitofp 189 %D1 = sitofp <8 x i32> %d to <8 x float> 190 ret void 191} 192 193define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { 194; CHECK-LABEL: for function 'uitofp4' 195 ; CHECK: cost of 7 {{.*}} uitofp 196 %A1 = uitofp <4 x i1> %a to <4 x float> 197 ; CHECK: cost of 7 {{.*}} uitofp 198 %A2 = uitofp <4 x i1> %a to <4 x double> 199 200 ; CHECK: cost of 2 {{.*}} uitofp 201 %B1 = uitofp <4 x i8> %b to <4 x float> 202 ; CHECK: cost of 2 {{.*}} uitofp 203 %B2 = uitofp <4 x i8> %b to <4 x double> 204 205 ; CHECK: cost of 2 {{.*}} uitofp 206 %C1 = uitofp <4 x i16> %c to <4 x float> 207 ; CHECK: cost of 2 {{.*}} uitofp 208 %C2 = uitofp <4 x i16> %c to <4 x double> 209 210 ; CHECK: cost of 6 {{.*}} uitofp 211 %D1 = uitofp <4 x i32> %d to <4 x float> 212 ; CHECK: cost of 6 {{.*}} uitofp 213 %D2 = uitofp <4 x i32> %d to <4 x double> 214 ret void 215} 216 217define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { 218; CHECK-LABEL: for function 'uitofp8' 219 ; CHECK: cost of 6 {{.*}} uitofp 220 %A1 = uitofp <8 x i1> %a to <8 x float> 221 222 ; CHECK: cost of 5 {{.*}} uitofp 223 %B1 = uitofp <8 x i8> %b to <8 x float> 224 225 ; CHECK: cost of 5 {{.*}} uitofp 226 %C1 = uitofp <8 x i16> %c to <8 x float> 227 228 ; CHECK-AVX2: cost of 8 {{.*}} uitofp 229 ; CHECK-AVX512: cost of 8 {{.*}} uitofp 230 ; CHECK-AVX: cost of 9 {{.*}} uitofp 231 %D1 = uitofp <8 x i32> %d to <8 x float> 232 ret void 233} 234 235define void @fp_conv(<8 x float> %a, <16 x float>%b) { 236;CHECK-LABEL: for function 'fp_conv' 237 ; CHECK-AVX512: cost of 1 {{.*}} fpext 238 %A1 = fpext <8 x float> %a to <8 x double> 239 240 ; CHECK-AVX512: cost of 3 {{.*}} fpext 241 %A2 = fpext <16 x float> %b to <16 x double> 242 243 ; CHECK-AVX2: cost of 3 {{.*}} %A3 = fpext 244 ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext 245 %A3 = fpext <8 x float> %a to <8 x double> 246 247 ; CHECK-AVX2: cost of 3 {{.*}} %A4 = fptrunc 248 ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc 249 %A4 = fptrunc <8 x double> undef to <8 x float> 250 251 ; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc 252 %A5 = fptrunc <16 x double> undef to <16 x float> 253 ret void 254} 255