1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: opt -mtriple=powerpc-unknown-linux-gnu < %s -instcombine | \ 3; RUN: llc -mtriple=ppc32-- -mcpu=g5 | not grep vperm 4; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | FileCheck %s 5 6define void @VSLDOI_xy(<8 x i16>* %A, <8 x i16>* %B) { 7; CHECK-LABEL: VSLDOI_xy: 8; CHECK: # %bb.0: # %entry 9; CHECK-NEXT: lvx 2, 0, 3 10; CHECK-NEXT: lvx 3, 0, 4 11; CHECK-NEXT: vsldoi 2, 2, 3, 5 12; CHECK-NEXT: stvx 2, 0, 3 13; CHECK-NEXT: blr 14entry: 15 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1] 16 %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=1] 17 %tmp.upgrd.1 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11] 18 %tmp2.upgrd.2 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5] 19 %tmp.upgrd.3 = extractelement <16 x i8> %tmp.upgrd.1, i32 5 ; <i8> [#uses=1] 20 %tmp3 = extractelement <16 x i8> %tmp.upgrd.1, i32 6 ; <i8> [#uses=1] 21 %tmp4 = extractelement <16 x i8> %tmp.upgrd.1, i32 7 ; <i8> [#uses=1] 22 %tmp5 = extractelement <16 x i8> %tmp.upgrd.1, i32 8 ; <i8> [#uses=1] 23 %tmp6 = extractelement <16 x i8> %tmp.upgrd.1, i32 9 ; <i8> [#uses=1] 24 %tmp7 = extractelement <16 x i8> %tmp.upgrd.1, i32 10 ; <i8> [#uses=1] 25 %tmp8 = extractelement <16 x i8> %tmp.upgrd.1, i32 11 ; <i8> [#uses=1] 26 %tmp9 = extractelement <16 x i8> %tmp.upgrd.1, i32 12 ; <i8> [#uses=1] 27 %tmp10 = extractelement <16 x i8> %tmp.upgrd.1, i32 13 ; <i8> [#uses=1] 28 %tmp11 = extractelement <16 x i8> %tmp.upgrd.1, i32 14 ; <i8> [#uses=1] 29 %tmp12 = extractelement <16 x i8> %tmp.upgrd.1, i32 15 ; <i8> [#uses=1] 30 %tmp13 = extractelement <16 x i8> %tmp2.upgrd.2, i32 0 ; <i8> [#uses=1] 31 %tmp14 = extractelement <16 x i8> %tmp2.upgrd.2, i32 1 ; <i8> [#uses=1] 32 %tmp15 = extractelement <16 x i8> %tmp2.upgrd.2, i32 2 ; <i8> [#uses=1] 33 %tmp16 = extractelement <16 x i8> %tmp2.upgrd.2, i32 3 ; <i8> [#uses=1] 34 %tmp17 = extractelement <16 x i8> %tmp2.upgrd.2, i32 4 ; <i8> [#uses=1] 35 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.3, i32 0 ; <<16 x i8>> [#uses=1] 36 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1] 37 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1] 38 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1] 39 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1] 40 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1] 41 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1] 42 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1] 43 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1] 44 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1] 45 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1] 46 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1] 47 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1] 48 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1] 49 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1] 50 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1] 51 %tmp33.upgrd.4 = bitcast <16 x i8> %tmp33 to <8 x i16> ; <<8 x i16>> [#uses=1] 52 store <8 x i16> %tmp33.upgrd.4, <8 x i16>* %A 53 ret void 54} 55 56define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) { 57; CHECK-LABEL: VSLDOI_xx: 58; CHECK: # %bb.0: 59; CHECK-NEXT: lvx 2, 0, 3 60; CHECK-NEXT: vsldoi 2, 2, 2, 5 61; CHECK-NEXT: stvx 2, 0, 3 62; CHECK-NEXT: blr 63 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1] 64 %tmp2 = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1] 65 %tmp.upgrd.5 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11] 66 %tmp2.upgrd.6 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5] 67 %tmp.upgrd.7 = extractelement <16 x i8> %tmp.upgrd.5, i32 5 ; <i8> [#uses=1] 68 %tmp3 = extractelement <16 x i8> %tmp.upgrd.5, i32 6 ; <i8> [#uses=1] 69 %tmp4 = extractelement <16 x i8> %tmp.upgrd.5, i32 7 ; <i8> [#uses=1] 70 %tmp5 = extractelement <16 x i8> %tmp.upgrd.5, i32 8 ; <i8> [#uses=1] 71 %tmp6 = extractelement <16 x i8> %tmp.upgrd.5, i32 9 ; <i8> [#uses=1] 72 %tmp7 = extractelement <16 x i8> %tmp.upgrd.5, i32 10 ; <i8> [#uses=1] 73 %tmp8 = extractelement <16 x i8> %tmp.upgrd.5, i32 11 ; <i8> [#uses=1] 74 %tmp9 = extractelement <16 x i8> %tmp.upgrd.5, i32 12 ; <i8> [#uses=1] 75 %tmp10 = extractelement <16 x i8> %tmp.upgrd.5, i32 13 ; <i8> [#uses=1] 76 %tmp11 = extractelement <16 x i8> %tmp.upgrd.5, i32 14 ; <i8> [#uses=1] 77 %tmp12 = extractelement <16 x i8> %tmp.upgrd.5, i32 15 ; <i8> [#uses=1] 78 %tmp13 = extractelement <16 x i8> %tmp2.upgrd.6, i32 0 ; <i8> [#uses=1] 79 %tmp14 = extractelement <16 x i8> %tmp2.upgrd.6, i32 1 ; <i8> [#uses=1] 80 %tmp15 = extractelement <16 x i8> %tmp2.upgrd.6, i32 2 ; <i8> [#uses=1] 81 %tmp16 = extractelement <16 x i8> %tmp2.upgrd.6, i32 3 ; <i8> [#uses=1] 82 %tmp17 = extractelement <16 x i8> %tmp2.upgrd.6, i32 4 ; <i8> [#uses=1] 83 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.7, i32 0 ; <<16 x i8>> [#uses=1] 84 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1] 85 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1] 86 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1] 87 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1] 88 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1] 89 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1] 90 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1] 91 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1] 92 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1] 93 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1] 94 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1] 95 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1] 96 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1] 97 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1] 98 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1] 99 %tmp33.upgrd.8 = bitcast <16 x i8> %tmp33 to <8 x i16> ; <<8 x i16>> [#uses=1] 100 store <8 x i16> %tmp33.upgrd.8, <8 x i16>* %A 101 ret void 102} 103 104define void @VPERM_promote(<8 x i16>* %A, <8 x i16>* %B) { 105; CHECK-LABEL: VPERM_promote: 106; CHECK: # %bb.0: # %entry 107; CHECK-NEXT: lvx 2, 0, 3 108; CHECK-NEXT: vspltisb 4, 14 109; CHECK-NEXT: lvx 3, 0, 4 110; CHECK-NEXT: vperm 2, 2, 3, 4 111; CHECK-NEXT: stvx 2, 0, 3 112; CHECK-NEXT: blr 113entry: 114 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1] 115 %tmp.upgrd.9 = bitcast <8 x i16> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1] 116 %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=1] 117 %tmp2.upgrd.10 = bitcast <8 x i16> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1] 118 %tmp3 = call <4 x i32> @llvm.ppc.altivec.vperm( <4 x i32> %tmp.upgrd.9, <4 x i32> %tmp2.upgrd.10, <16 x i8> < i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14 > ) ; <<4 x i32>> [#uses=1] 119 %tmp3.upgrd.11 = bitcast <4 x i32> %tmp3 to <8 x i16> ; <<8 x i16>> [#uses=1] 120 store <8 x i16> %tmp3.upgrd.11, <8 x i16>* %A 121 ret void 122} 123 124declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>) 125 126define void @tb_l(<16 x i8>* %A, <16 x i8>* %B) { 127; CHECK-LABEL: tb_l: 128; CHECK: # %bb.0: # %entry 129; CHECK-NEXT: lvx 2, 0, 3 130; CHECK-NEXT: lvx 3, 0, 4 131; CHECK-NEXT: vmrglb 2, 2, 3 132; CHECK-NEXT: stvx 2, 0, 3 133; CHECK-NEXT: blr 134entry: 135 %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=8] 136 %tmp2 = load <16 x i8>, <16 x i8>* %B ; <<16 x i8>> [#uses=8] 137 %tmp.upgrd.12 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1] 138 %tmp3 = extractelement <16 x i8> %tmp2, i32 8 ; <i8> [#uses=1] 139 %tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1] 140 %tmp5 = extractelement <16 x i8> %tmp2, i32 9 ; <i8> [#uses=1] 141 %tmp6 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1] 142 %tmp7 = extractelement <16 x i8> %tmp2, i32 10 ; <i8> [#uses=1] 143 %tmp8 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1] 144 %tmp9 = extractelement <16 x i8> %tmp2, i32 11 ; <i8> [#uses=1] 145 %tmp10 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1] 146 %tmp11 = extractelement <16 x i8> %tmp2, i32 12 ; <i8> [#uses=1] 147 %tmp12 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1] 148 %tmp13 = extractelement <16 x i8> %tmp2, i32 13 ; <i8> [#uses=1] 149 %tmp14 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1] 150 %tmp15 = extractelement <16 x i8> %tmp2, i32 14 ; <i8> [#uses=1] 151 %tmp16 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1] 152 %tmp17 = extractelement <16 x i8> %tmp2, i32 15 ; <i8> [#uses=1] 153 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.12, i32 0 ; <<16 x i8>> [#uses=1] 154 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1] 155 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1] 156 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1] 157 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1] 158 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1] 159 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1] 160 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1] 161 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1] 162 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1] 163 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1] 164 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1] 165 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1] 166 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1] 167 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1] 168 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1] 169 store <16 x i8> %tmp33, <16 x i8>* %A 170 ret void 171} 172 173define void @th_l(<8 x i16>* %A, <8 x i16>* %B) { 174; CHECK-LABEL: th_l: 175; CHECK: # %bb.0: # %entry 176; CHECK-NEXT: lvx 2, 0, 3 177; CHECK-NEXT: lvx 3, 0, 4 178; CHECK-NEXT: vmrglh 2, 2, 3 179; CHECK-NEXT: stvx 2, 0, 3 180; CHECK-NEXT: blr 181entry: 182 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=4] 183 %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=4] 184 %tmp.upgrd.13 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1] 185 %tmp3 = extractelement <8 x i16> %tmp2, i32 4 ; <i16> [#uses=1] 186 %tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1] 187 %tmp5 = extractelement <8 x i16> %tmp2, i32 5 ; <i16> [#uses=1] 188 %tmp6 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1] 189 %tmp7 = extractelement <8 x i16> %tmp2, i32 6 ; <i16> [#uses=1] 190 %tmp8 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1] 191 %tmp9 = extractelement <8 x i16> %tmp2, i32 7 ; <i16> [#uses=1] 192 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.13, i32 0 ; <<8 x i16>> [#uses=1] 193 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1] 194 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1] 195 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1] 196 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1] 197 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1] 198 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1] 199 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1] 200 store <8 x i16> %tmp17, <8 x i16>* %A 201 ret void 202} 203 204define void @tw_l(<4 x i32>* %A, <4 x i32>* %B) { 205; CHECK-LABEL: tw_l: 206; CHECK: # %bb.0: # %entry 207; CHECK-NEXT: lvx 2, 0, 3 208; CHECK-NEXT: lvx 3, 0, 4 209; CHECK-NEXT: vmrglw 2, 2, 3 210; CHECK-NEXT: stvx 2, 0, 3 211; CHECK-NEXT: blr 212entry: 213 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2] 214 %tmp2 = load <4 x i32>, <4 x i32>* %B ; <<4 x i32>> [#uses=2] 215 %tmp.upgrd.14 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1] 216 %tmp3 = extractelement <4 x i32> %tmp2, i32 2 ; <i32> [#uses=1] 217 %tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1] 218 %tmp5 = extractelement <4 x i32> %tmp2, i32 3 ; <i32> [#uses=1] 219 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.14, i32 0 ; <<4 x i32>> [#uses=1] 220 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 221 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1] 222 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1] 223 store <4 x i32> %tmp9, <4 x i32>* %A 224 ret void 225} 226 227define void @tb_h(<16 x i8>* %A, <16 x i8>* %B) { 228; CHECK-LABEL: tb_h: 229; CHECK: # %bb.0: # %entry 230; CHECK-NEXT: lvx 2, 0, 3 231; CHECK-NEXT: lvx 3, 0, 4 232; CHECK-NEXT: vmrghb 2, 2, 3 233; CHECK-NEXT: stvx 2, 0, 3 234; CHECK-NEXT: blr 235entry: 236 %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=8] 237 %tmp2 = load <16 x i8>, <16 x i8>* %B ; <<16 x i8>> [#uses=8] 238 %tmp.upgrd.15 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1] 239 %tmp3 = extractelement <16 x i8> %tmp2, i32 0 ; <i8> [#uses=1] 240 %tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1] 241 %tmp5 = extractelement <16 x i8> %tmp2, i32 1 ; <i8> [#uses=1] 242 %tmp6 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1] 243 %tmp7 = extractelement <16 x i8> %tmp2, i32 2 ; <i8> [#uses=1] 244 %tmp8 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1] 245 %tmp9 = extractelement <16 x i8> %tmp2, i32 3 ; <i8> [#uses=1] 246 %tmp10 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1] 247 %tmp11 = extractelement <16 x i8> %tmp2, i32 4 ; <i8> [#uses=1] 248 %tmp12 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1] 249 %tmp13 = extractelement <16 x i8> %tmp2, i32 5 ; <i8> [#uses=1] 250 %tmp14 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1] 251 %tmp15 = extractelement <16 x i8> %tmp2, i32 6 ; <i8> [#uses=1] 252 %tmp16 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1] 253 %tmp17 = extractelement <16 x i8> %tmp2, i32 7 ; <i8> [#uses=1] 254 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.15, i32 0 ; <<16 x i8>> [#uses=1] 255 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1] 256 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1] 257 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1] 258 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1] 259 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1] 260 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1] 261 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1] 262 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1] 263 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1] 264 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1] 265 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1] 266 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1] 267 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1] 268 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1] 269 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1] 270 store <16 x i8> %tmp33, <16 x i8>* %A 271 ret void 272} 273 274define void @th_h(<8 x i16>* %A, <8 x i16>* %B) { 275; CHECK-LABEL: th_h: 276; CHECK: # %bb.0: # %entry 277; CHECK-NEXT: lvx 2, 0, 3 278; CHECK-NEXT: lvx 3, 0, 4 279; CHECK-NEXT: vmrghh 2, 2, 3 280; CHECK-NEXT: stvx 2, 0, 3 281; CHECK-NEXT: blr 282entry: 283 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=4] 284 %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=4] 285 %tmp.upgrd.16 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1] 286 %tmp3 = extractelement <8 x i16> %tmp2, i32 0 ; <i16> [#uses=1] 287 %tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1] 288 %tmp5 = extractelement <8 x i16> %tmp2, i32 1 ; <i16> [#uses=1] 289 %tmp6 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1] 290 %tmp7 = extractelement <8 x i16> %tmp2, i32 2 ; <i16> [#uses=1] 291 %tmp8 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1] 292 %tmp9 = extractelement <8 x i16> %tmp2, i32 3 ; <i16> [#uses=1] 293 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.16, i32 0 ; <<8 x i16>> [#uses=1] 294 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1] 295 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1] 296 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1] 297 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1] 298 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1] 299 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1] 300 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1] 301 store <8 x i16> %tmp17, <8 x i16>* %A 302 ret void 303} 304 305define void @tw_h(<4 x i32>* %A, <4 x i32>* %B) { 306; CHECK-LABEL: tw_h: 307; CHECK: # %bb.0: # %entry 308; CHECK-NEXT: lvx 2, 0, 3 309; CHECK-NEXT: lvx 3, 0, 4 310; CHECK-NEXT: vmrghw 2, 3, 2 311; CHECK-NEXT: stvx 2, 0, 3 312; CHECK-NEXT: blr 313entry: 314 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2] 315 %tmp2 = load <4 x i32>, <4 x i32>* %B ; <<4 x i32>> [#uses=2] 316 %tmp.upgrd.17 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1] 317 %tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1] 318 %tmp4 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1] 319 %tmp5 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1] 320 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.17, i32 0 ; <<4 x i32>> [#uses=1] 321 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 322 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1] 323 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1] 324 store <4 x i32> %tmp9, <4 x i32>* %A 325 ret void 326} 327 328define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) { 329; CHECK-LABEL: tw_h_flop: 330; CHECK: # %bb.0: 331; CHECK-NEXT: lvx 2, 0, 3 332; CHECK-NEXT: lvx 3, 0, 4 333; CHECK-NEXT: vmrghw 2, 2, 3 334; CHECK-NEXT: stvx 2, 0, 3 335; CHECK-NEXT: blr 336 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2] 337 %tmp2 = load <4 x i32>, <4 x i32>* %B ; <<4 x i32>> [#uses=2] 338 %tmp.upgrd.18 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1] 339 %tmp3 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1] 340 %tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1] 341 %tmp5 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1] 342 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.18, i32 0 ; <<4 x i32>> [#uses=1] 343 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 344 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1] 345 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1] 346 store <4 x i32> %tmp9, <4 x i32>* %A 347 ret void 348} 349 350define void @VMRG_UNARY_tb_l(<16 x i8>* %A, <16 x i8>* %B) { 351; CHECK-LABEL: VMRG_UNARY_tb_l: 352; CHECK: # %bb.0: # %entry 353; CHECK-NEXT: lvx 2, 0, 3 354; CHECK-NEXT: vmrglb 2, 2, 2 355; CHECK-NEXT: stvx 2, 0, 3 356; CHECK-NEXT: blr 357entry: 358 %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=16] 359 %tmp.upgrd.19 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1] 360 %tmp3 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1] 361 %tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1] 362 %tmp5 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1] 363 %tmp6 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1] 364 %tmp7 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1] 365 %tmp8 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1] 366 %tmp9 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1] 367 %tmp10 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1] 368 %tmp11 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1] 369 %tmp12 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1] 370 %tmp13 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1] 371 %tmp14 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1] 372 %tmp15 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1] 373 %tmp16 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1] 374 %tmp17 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1] 375 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.19, i32 0 ; <<16 x i8>> [#uses=1] 376 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1] 377 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1] 378 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1] 379 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1] 380 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1] 381 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1] 382 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1] 383 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1] 384 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1] 385 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1] 386 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1] 387 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1] 388 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1] 389 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1] 390 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1] 391 store <16 x i8> %tmp33, <16 x i8>* %A 392 ret void 393} 394 395define void @VMRG_UNARY_th_l(<8 x i16>* %A, <8 x i16>* %B) { 396; CHECK-LABEL: VMRG_UNARY_th_l: 397; CHECK: # %bb.0: # %entry 398; CHECK-NEXT: lvx 2, 0, 3 399; CHECK-NEXT: vmrglh 2, 2, 2 400; CHECK-NEXT: stvx 2, 0, 3 401; CHECK-NEXT: blr 402entry: 403 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=8] 404 %tmp.upgrd.20 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1] 405 %tmp3 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1] 406 %tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1] 407 %tmp5 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1] 408 %tmp6 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1] 409 %tmp7 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1] 410 %tmp8 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1] 411 %tmp9 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1] 412 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.20, i32 0 ; <<8 x i16>> [#uses=1] 413 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1] 414 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1] 415 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1] 416 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1] 417 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1] 418 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1] 419 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1] 420 store <8 x i16> %tmp17, <8 x i16>* %A 421 ret void 422} 423 424define void @VMRG_UNARY_tw_l(<4 x i32>* %A, <4 x i32>* %B) { 425; CHECK-LABEL: VMRG_UNARY_tw_l: 426; CHECK: # %bb.0: # %entry 427; CHECK-NEXT: lvx 2, 0, 3 428; CHECK-NEXT: vmrglw 2, 2, 2 429; CHECK-NEXT: stvx 2, 0, 3 430; CHECK-NEXT: blr 431entry: 432 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=4] 433 %tmp.upgrd.21 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1] 434 %tmp3 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1] 435 %tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1] 436 %tmp5 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1] 437 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.21, i32 0 ; <<4 x i32>> [#uses=1] 438 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 439 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1] 440 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1] 441 store <4 x i32> %tmp9, <4 x i32>* %A 442 ret void 443} 444 445define void @VMRG_UNARY_tb_h(<16 x i8>* %A, <16 x i8>* %B) { 446; CHECK-LABEL: VMRG_UNARY_tb_h: 447; CHECK: # %bb.0: # %entry 448; CHECK-NEXT: lvx 2, 0, 3 449; CHECK-NEXT: vmrghb 2, 2, 2 450; CHECK-NEXT: stvx 2, 0, 3 451; CHECK-NEXT: blr 452entry: 453 %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=16] 454 %tmp.upgrd.22 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1] 455 %tmp3 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1] 456 %tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1] 457 %tmp5 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1] 458 %tmp6 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1] 459 %tmp7 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1] 460 %tmp8 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1] 461 %tmp9 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1] 462 %tmp10 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1] 463 %tmp11 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1] 464 %tmp12 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1] 465 %tmp13 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1] 466 %tmp14 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1] 467 %tmp15 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1] 468 %tmp16 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1] 469 %tmp17 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1] 470 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.22, i32 0 ; <<16 x i8>> [#uses=1] 471 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1] 472 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1] 473 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1] 474 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1] 475 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1] 476 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1] 477 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1] 478 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1] 479 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1] 480 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1] 481 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1] 482 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1] 483 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1] 484 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1] 485 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1] 486 store <16 x i8> %tmp33, <16 x i8>* %A 487 ret void 488} 489 490define void @VMRG_UNARY_th_h(<8 x i16>* %A, <8 x i16>* %B) { 491; CHECK-LABEL: VMRG_UNARY_th_h: 492; CHECK: # %bb.0: # %entry 493; CHECK-NEXT: lvx 2, 0, 3 494; CHECK-NEXT: vmrghh 2, 2, 2 495; CHECK-NEXT: stvx 2, 0, 3 496; CHECK-NEXT: blr 497entry: 498 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=8] 499 %tmp.upgrd.23 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1] 500 %tmp3 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1] 501 %tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1] 502 %tmp5 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1] 503 %tmp6 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1] 504 %tmp7 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1] 505 %tmp8 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1] 506 %tmp9 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1] 507 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.23, i32 0 ; <<8 x i16>> [#uses=1] 508 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1] 509 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1] 510 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1] 511 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1] 512 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1] 513 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1] 514 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1] 515 store <8 x i16> %tmp17, <8 x i16>* %A 516 ret void 517} 518 519define void @VMRG_UNARY_tw_h(<4 x i32>* %A, <4 x i32>* %B) { 520; CHECK-LABEL: VMRG_UNARY_tw_h: 521; CHECK: # %bb.0: # %entry 522; CHECK-NEXT: lvx 2, 0, 3 523; CHECK-NEXT: vmrghw 2, 2, 2 524; CHECK-NEXT: stvx 2, 0, 3 525; CHECK-NEXT: blr 526entry: 527 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=4] 528 %tmp.upgrd.24 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1] 529 %tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1] 530 %tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1] 531 %tmp5 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1] 532 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.24, i32 0 ; <<4 x i32>> [#uses=1] 533 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 534 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1] 535 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1] 536 store <4 x i32> %tmp9, <4 x i32>* %A 537 ret void 538} 539 540define void @VPCKUHUM_unary(<8 x i16>* %A, <8 x i16>* %B) { 541; CHECK-LABEL: VPCKUHUM_unary: 542; CHECK: # %bb.0: # %entry 543; CHECK-NEXT: lvx 2, 0, 3 544; CHECK-NEXT: vpkuhum 2, 2, 2 545; CHECK-NEXT: stvx 2, 0, 3 546; CHECK-NEXT: blr 547entry: 548 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=2] 549 %tmp.upgrd.25 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8] 550 %tmp3 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8] 551 %tmp.upgrd.26 = extractelement <16 x i8> %tmp.upgrd.25, i32 1 ; <i8> [#uses=1] 552 %tmp4 = extractelement <16 x i8> %tmp.upgrd.25, i32 3 ; <i8> [#uses=1] 553 %tmp5 = extractelement <16 x i8> %tmp.upgrd.25, i32 5 ; <i8> [#uses=1] 554 %tmp6 = extractelement <16 x i8> %tmp.upgrd.25, i32 7 ; <i8> [#uses=1] 555 %tmp7 = extractelement <16 x i8> %tmp.upgrd.25, i32 9 ; <i8> [#uses=1] 556 %tmp8 = extractelement <16 x i8> %tmp.upgrd.25, i32 11 ; <i8> [#uses=1] 557 %tmp9 = extractelement <16 x i8> %tmp.upgrd.25, i32 13 ; <i8> [#uses=1] 558 %tmp10 = extractelement <16 x i8> %tmp.upgrd.25, i32 15 ; <i8> [#uses=1] 559 %tmp11 = extractelement <16 x i8> %tmp3, i32 1 ; <i8> [#uses=1] 560 %tmp12 = extractelement <16 x i8> %tmp3, i32 3 ; <i8> [#uses=1] 561 %tmp13 = extractelement <16 x i8> %tmp3, i32 5 ; <i8> [#uses=1] 562 %tmp14 = extractelement <16 x i8> %tmp3, i32 7 ; <i8> [#uses=1] 563 %tmp15 = extractelement <16 x i8> %tmp3, i32 9 ; <i8> [#uses=1] 564 %tmp16 = extractelement <16 x i8> %tmp3, i32 11 ; <i8> [#uses=1] 565 %tmp17 = extractelement <16 x i8> %tmp3, i32 13 ; <i8> [#uses=1] 566 %tmp18 = extractelement <16 x i8> %tmp3, i32 15 ; <i8> [#uses=1] 567 %tmp19 = insertelement <16 x i8> undef, i8 %tmp.upgrd.26, i32 0 ; <<16 x i8>> [#uses=1] 568 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 1 ; <<16 x i8>> [#uses=1] 569 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 2 ; <<16 x i8>> [#uses=1] 570 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 3 ; <<16 x i8>> [#uses=1] 571 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 4 ; <<16 x i8>> [#uses=1] 572 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 5 ; <<16 x i8>> [#uses=1] 573 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 6 ; <<16 x i8>> [#uses=1] 574 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 7 ; <<16 x i8>> [#uses=1] 575 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 8 ; <<16 x i8>> [#uses=1] 576 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 9 ; <<16 x i8>> [#uses=1] 577 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 10 ; <<16 x i8>> [#uses=1] 578 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 11 ; <<16 x i8>> [#uses=1] 579 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 12 ; <<16 x i8>> [#uses=1] 580 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 13 ; <<16 x i8>> [#uses=1] 581 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 14 ; <<16 x i8>> [#uses=1] 582 %tmp34 = insertelement <16 x i8> %tmp33, i8 %tmp18, i32 15 ; <<16 x i8>> [#uses=1] 583 %tmp34.upgrd.27 = bitcast <16 x i8> %tmp34 to <8 x i16> ; <<8 x i16>> [#uses=1] 584 store <8 x i16> %tmp34.upgrd.27, <8 x i16>* %A 585 ret void 586} 587 588define void @VPCKUWUM_unary(<4 x i32>* %A, <4 x i32>* %B) { 589; CHECK-LABEL: VPCKUWUM_unary: 590; CHECK: # %bb.0: # %entry 591; CHECK-NEXT: lvx 2, 0, 3 592; CHECK-NEXT: vpkuwum 2, 2, 2 593; CHECK-NEXT: stvx 2, 0, 3 594; CHECK-NEXT: blr 595entry: 596 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2] 597 %tmp.upgrd.28 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4] 598 %tmp3 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4] 599 %tmp.upgrd.29 = extractelement <8 x i16> %tmp.upgrd.28, i32 1 ; <i16> [#uses=1] 600 %tmp4 = extractelement <8 x i16> %tmp.upgrd.28, i32 3 ; <i16> [#uses=1] 601 %tmp5 = extractelement <8 x i16> %tmp.upgrd.28, i32 5 ; <i16> [#uses=1] 602 %tmp6 = extractelement <8 x i16> %tmp.upgrd.28, i32 7 ; <i16> [#uses=1] 603 %tmp7 = extractelement <8 x i16> %tmp3, i32 1 ; <i16> [#uses=1] 604 %tmp8 = extractelement <8 x i16> %tmp3, i32 3 ; <i16> [#uses=1] 605 %tmp9 = extractelement <8 x i16> %tmp3, i32 5 ; <i16> [#uses=1] 606 %tmp10 = extractelement <8 x i16> %tmp3, i32 7 ; <i16> [#uses=1] 607 %tmp11 = insertelement <8 x i16> undef, i16 %tmp.upgrd.29, i32 0 ; <<8 x i16>> [#uses=1] 608 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 1 ; <<8 x i16>> [#uses=1] 609 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 2 ; <<8 x i16>> [#uses=1] 610 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 3 ; <<8 x i16>> [#uses=1] 611 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 4 ; <<8 x i16>> [#uses=1] 612 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 5 ; <<8 x i16>> [#uses=1] 613 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 6 ; <<8 x i16>> [#uses=1] 614 %tmp18 = insertelement <8 x i16> %tmp17, i16 %tmp10, i32 7 ; <<8 x i16>> [#uses=1] 615 %tmp18.upgrd.30 = bitcast <8 x i16> %tmp18 to <4 x i32> ; <<4 x i32>> [#uses=1] 616 store <4 x i32> %tmp18.upgrd.30, <4 x i32>* %A 617 ret void 618} 619