1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6 7; 8; PR6455 'Clear Upper Bits' Patterns 9; 10 11define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind { 12; SSE2-LABEL: _clearupper2xi64a: 13; SSE2: # %bb.0: 14; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 15; SSE2-NEXT: retq 16; 17; SSE42-LABEL: _clearupper2xi64a: 18; SSE42: # %bb.0: 19; SSE42-NEXT: xorps %xmm1, %xmm1 20; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 21; SSE42-NEXT: retq 22; 23; AVX-LABEL: _clearupper2xi64a: 24; AVX: # %bb.0: 25; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 26; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 27; AVX-NEXT: retq 28 %x0 = extractelement <2 x i64> %0, i32 0 29 %x1 = extractelement <2 x i64> %0, i32 1 30 %trunc0 = trunc i64 %x0 to i32 31 %trunc1 = trunc i64 %x1 to i32 32 %ext0 = zext i32 %trunc0 to i64 33 %ext1 = zext i32 %trunc1 to i64 34 %v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0 35 %v1 = insertelement <2 x i64> %v0, i64 %ext1, i32 1 36 ret <2 x i64> %v1 37} 38 39define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind { 40; SSE2-LABEL: _clearupper4xi64a: 41; SSE2: # %bb.0: 42; SSE2-NEXT: movaps {{.*#+}} xmm2 = [4294967295,4294967295] 43; SSE2-NEXT: andps %xmm2, %xmm0 44; SSE2-NEXT: andps %xmm2, %xmm1 45; SSE2-NEXT: retq 46; 47; SSE42-LABEL: _clearupper4xi64a: 48; SSE42: # %bb.0: 49; SSE42-NEXT: xorps %xmm2, %xmm2 50; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 51; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 52; SSE42-NEXT: retq 53; 54; AVX-LABEL: _clearupper4xi64a: 55; AVX: # %bb.0: 56; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 57; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 58; AVX-NEXT: retq 59 %x0 = extractelement <4 x i64> %0, i32 0 60 %x1 = extractelement <4 x i64> %0, i32 1 61 %x2 = extractelement <4 x i64> %0, i32 2 62 %x3 = extractelement <4 x i64> %0, i32 3 63 %trunc0 = trunc i64 %x0 to i32 64 %trunc1 = trunc i64 %x1 to i32 65 %trunc2 = trunc i64 %x2 to i32 66 %trunc3 = trunc i64 %x3 to i32 67 %ext0 = zext i32 %trunc0 to i64 68 %ext1 = zext i32 %trunc1 to i64 69 %ext2 = zext i32 %trunc2 to i64 70 %ext3 = zext i32 %trunc3 to i64 71 %v0 = insertelement <4 x i64> undef, i64 %ext0, i32 0 72 %v1 = insertelement <4 x i64> %v0, i64 %ext1, i32 1 73 %v2 = insertelement <4 x i64> %v1, i64 %ext2, i32 2 74 %v3 = insertelement <4 x i64> %v2, i64 %ext3, i32 3 75 ret <4 x i64> %v3 76} 77 78define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind { 79; SSE2-LABEL: _clearupper4xi32a: 80; SSE2: # %bb.0: 81; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 82; SSE2-NEXT: retq 83; 84; SSE42-LABEL: _clearupper4xi32a: 85; SSE42: # %bb.0: 86; SSE42-NEXT: pxor %xmm1, %xmm1 87; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 88; SSE42-NEXT: retq 89; 90; AVX-LABEL: _clearupper4xi32a: 91; AVX: # %bb.0: 92; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 93; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 94; AVX-NEXT: retq 95 %x0 = extractelement <4 x i32> %0, i32 0 96 %x1 = extractelement <4 x i32> %0, i32 1 97 %x2 = extractelement <4 x i32> %0, i32 2 98 %x3 = extractelement <4 x i32> %0, i32 3 99 %trunc0 = trunc i32 %x0 to i16 100 %trunc1 = trunc i32 %x1 to i16 101 %trunc2 = trunc i32 %x2 to i16 102 %trunc3 = trunc i32 %x3 to i16 103 %ext0 = zext i16 %trunc0 to i32 104 %ext1 = zext i16 %trunc1 to i32 105 %ext2 = zext i16 %trunc2 to i32 106 %ext3 = zext i16 %trunc3 to i32 107 %v0 = insertelement <4 x i32> undef, i32 %ext0, i32 0 108 %v1 = insertelement <4 x i32> %v0, i32 %ext1, i32 1 109 %v2 = insertelement <4 x i32> %v1, i32 %ext2, i32 2 110 %v3 = insertelement <4 x i32> %v2, i32 %ext3, i32 3 111 ret <4 x i32> %v3 112} 113 114define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind { 115; SSE2-LABEL: _clearupper8xi32a: 116; SSE2: # %bb.0: 117; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,65535,65535,65535] 118; SSE2-NEXT: andps %xmm2, %xmm0 119; SSE2-NEXT: andps %xmm2, %xmm1 120; SSE2-NEXT: retq 121; 122; SSE42-LABEL: _clearupper8xi32a: 123; SSE42: # %bb.0: 124; SSE42-NEXT: pxor %xmm2, %xmm2 125; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 126; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 127; SSE42-NEXT: retq 128; 129; AVX1-LABEL: _clearupper8xi32a: 130; AVX1: # %bb.0: 131; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 132; AVX1-NEXT: retq 133; 134; AVX2-LABEL: _clearupper8xi32a: 135; AVX2: # %bb.0: 136; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 137; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 138; AVX2-NEXT: retq 139 %x0 = extractelement <8 x i32> %0, i32 0 140 %x1 = extractelement <8 x i32> %0, i32 1 141 %x2 = extractelement <8 x i32> %0, i32 2 142 %x3 = extractelement <8 x i32> %0, i32 3 143 %x4 = extractelement <8 x i32> %0, i32 4 144 %x5 = extractelement <8 x i32> %0, i32 5 145 %x6 = extractelement <8 x i32> %0, i32 6 146 %x7 = extractelement <8 x i32> %0, i32 7 147 %trunc0 = trunc i32 %x0 to i16 148 %trunc1 = trunc i32 %x1 to i16 149 %trunc2 = trunc i32 %x2 to i16 150 %trunc3 = trunc i32 %x3 to i16 151 %trunc4 = trunc i32 %x4 to i16 152 %trunc5 = trunc i32 %x5 to i16 153 %trunc6 = trunc i32 %x6 to i16 154 %trunc7 = trunc i32 %x7 to i16 155 %ext0 = zext i16 %trunc0 to i32 156 %ext1 = zext i16 %trunc1 to i32 157 %ext2 = zext i16 %trunc2 to i32 158 %ext3 = zext i16 %trunc3 to i32 159 %ext4 = zext i16 %trunc4 to i32 160 %ext5 = zext i16 %trunc5 to i32 161 %ext6 = zext i16 %trunc6 to i32 162 %ext7 = zext i16 %trunc7 to i32 163 %v0 = insertelement <8 x i32> undef, i32 %ext0, i32 0 164 %v1 = insertelement <8 x i32> %v0, i32 %ext1, i32 1 165 %v2 = insertelement <8 x i32> %v1, i32 %ext2, i32 2 166 %v3 = insertelement <8 x i32> %v2, i32 %ext3, i32 3 167 %v4 = insertelement <8 x i32> %v3, i32 %ext4, i32 4 168 %v5 = insertelement <8 x i32> %v4, i32 %ext5, i32 5 169 %v6 = insertelement <8 x i32> %v5, i32 %ext6, i32 6 170 %v7 = insertelement <8 x i32> %v6, i32 %ext7, i32 7 171 ret <8 x i32> %v7 172} 173 174define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind { 175; SSE-LABEL: _clearupper8xi16a: 176; SSE: # %bb.0: 177; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 178; SSE-NEXT: retq 179; 180; AVX-LABEL: _clearupper8xi16a: 181; AVX: # %bb.0: 182; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 183; AVX-NEXT: retq 184 %x0 = extractelement <8 x i16> %0, i32 0 185 %x1 = extractelement <8 x i16> %0, i32 1 186 %x2 = extractelement <8 x i16> %0, i32 2 187 %x3 = extractelement <8 x i16> %0, i32 3 188 %x4 = extractelement <8 x i16> %0, i32 4 189 %x5 = extractelement <8 x i16> %0, i32 5 190 %x6 = extractelement <8 x i16> %0, i32 6 191 %x7 = extractelement <8 x i16> %0, i32 7 192 %trunc0 = trunc i16 %x0 to i8 193 %trunc1 = trunc i16 %x1 to i8 194 %trunc2 = trunc i16 %x2 to i8 195 %trunc3 = trunc i16 %x3 to i8 196 %trunc4 = trunc i16 %x4 to i8 197 %trunc5 = trunc i16 %x5 to i8 198 %trunc6 = trunc i16 %x6 to i8 199 %trunc7 = trunc i16 %x7 to i8 200 %ext0 = zext i8 %trunc0 to i16 201 %ext1 = zext i8 %trunc1 to i16 202 %ext2 = zext i8 %trunc2 to i16 203 %ext3 = zext i8 %trunc3 to i16 204 %ext4 = zext i8 %trunc4 to i16 205 %ext5 = zext i8 %trunc5 to i16 206 %ext6 = zext i8 %trunc6 to i16 207 %ext7 = zext i8 %trunc7 to i16 208 %v0 = insertelement <8 x i16> undef, i16 %ext0, i32 0 209 %v1 = insertelement <8 x i16> %v0, i16 %ext1, i32 1 210 %v2 = insertelement <8 x i16> %v1, i16 %ext2, i32 2 211 %v3 = insertelement <8 x i16> %v2, i16 %ext3, i32 3 212 %v4 = insertelement <8 x i16> %v3, i16 %ext4, i32 4 213 %v5 = insertelement <8 x i16> %v4, i16 %ext5, i32 5 214 %v6 = insertelement <8 x i16> %v5, i16 %ext6, i32 6 215 %v7 = insertelement <8 x i16> %v6, i16 %ext7, i32 7 216 ret <8 x i16> %v7 217} 218 219define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind { 220; SSE-LABEL: _clearupper16xi16a: 221; SSE: # %bb.0: 222; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 223; SSE-NEXT: andps %xmm2, %xmm0 224; SSE-NEXT: andps %xmm2, %xmm1 225; SSE-NEXT: retq 226; 227; AVX-LABEL: _clearupper16xi16a: 228; AVX: # %bb.0: 229; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 230; AVX-NEXT: retq 231 %x0 = extractelement <16 x i16> %0, i32 0 232 %x1 = extractelement <16 x i16> %0, i32 1 233 %x2 = extractelement <16 x i16> %0, i32 2 234 %x3 = extractelement <16 x i16> %0, i32 3 235 %x4 = extractelement <16 x i16> %0, i32 4 236 %x5 = extractelement <16 x i16> %0, i32 5 237 %x6 = extractelement <16 x i16> %0, i32 6 238 %x7 = extractelement <16 x i16> %0, i32 7 239 %x8 = extractelement <16 x i16> %0, i32 8 240 %x9 = extractelement <16 x i16> %0, i32 9 241 %x10 = extractelement <16 x i16> %0, i32 10 242 %x11 = extractelement <16 x i16> %0, i32 11 243 %x12 = extractelement <16 x i16> %0, i32 12 244 %x13 = extractelement <16 x i16> %0, i32 13 245 %x14 = extractelement <16 x i16> %0, i32 14 246 %x15 = extractelement <16 x i16> %0, i32 15 247 %trunc0 = trunc i16 %x0 to i8 248 %trunc1 = trunc i16 %x1 to i8 249 %trunc2 = trunc i16 %x2 to i8 250 %trunc3 = trunc i16 %x3 to i8 251 %trunc4 = trunc i16 %x4 to i8 252 %trunc5 = trunc i16 %x5 to i8 253 %trunc6 = trunc i16 %x6 to i8 254 %trunc7 = trunc i16 %x7 to i8 255 %trunc8 = trunc i16 %x8 to i8 256 %trunc9 = trunc i16 %x9 to i8 257 %trunc10 = trunc i16 %x10 to i8 258 %trunc11 = trunc i16 %x11 to i8 259 %trunc12 = trunc i16 %x12 to i8 260 %trunc13 = trunc i16 %x13 to i8 261 %trunc14 = trunc i16 %x14 to i8 262 %trunc15 = trunc i16 %x15 to i8 263 %ext0 = zext i8 %trunc0 to i16 264 %ext1 = zext i8 %trunc1 to i16 265 %ext2 = zext i8 %trunc2 to i16 266 %ext3 = zext i8 %trunc3 to i16 267 %ext4 = zext i8 %trunc4 to i16 268 %ext5 = zext i8 %trunc5 to i16 269 %ext6 = zext i8 %trunc6 to i16 270 %ext7 = zext i8 %trunc7 to i16 271 %ext8 = zext i8 %trunc8 to i16 272 %ext9 = zext i8 %trunc9 to i16 273 %ext10 = zext i8 %trunc10 to i16 274 %ext11 = zext i8 %trunc11 to i16 275 %ext12 = zext i8 %trunc12 to i16 276 %ext13 = zext i8 %trunc13 to i16 277 %ext14 = zext i8 %trunc14 to i16 278 %ext15 = zext i8 %trunc15 to i16 279 %v0 = insertelement <16 x i16> undef, i16 %ext0, i32 0 280 %v1 = insertelement <16 x i16> %v0, i16 %ext1, i32 1 281 %v2 = insertelement <16 x i16> %v1, i16 %ext2, i32 2 282 %v3 = insertelement <16 x i16> %v2, i16 %ext3, i32 3 283 %v4 = insertelement <16 x i16> %v3, i16 %ext4, i32 4 284 %v5 = insertelement <16 x i16> %v4, i16 %ext5, i32 5 285 %v6 = insertelement <16 x i16> %v5, i16 %ext6, i32 6 286 %v7 = insertelement <16 x i16> %v6, i16 %ext7, i32 7 287 %v8 = insertelement <16 x i16> %v7, i16 %ext8, i32 8 288 %v9 = insertelement <16 x i16> %v8, i16 %ext9, i32 9 289 %v10 = insertelement <16 x i16> %v9, i16 %ext10, i32 10 290 %v11 = insertelement <16 x i16> %v10, i16 %ext11, i32 11 291 %v12 = insertelement <16 x i16> %v11, i16 %ext12, i32 12 292 %v13 = insertelement <16 x i16> %v12, i16 %ext13, i32 13 293 %v14 = insertelement <16 x i16> %v13, i16 %ext14, i32 14 294 %v15 = insertelement <16 x i16> %v14, i16 %ext15, i32 15 295 ret <16 x i16> %v15 296} 297 298define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { 299; SSE-LABEL: _clearupper16xi8a: 300; SSE: # %bb.0: 301; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 302; SSE-NEXT: retq 303; 304; AVX-LABEL: _clearupper16xi8a: 305; AVX: # %bb.0: 306; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 307; AVX-NEXT: retq 308 %x0 = extractelement <16 x i8> %0, i32 0 309 %x1 = extractelement <16 x i8> %0, i32 1 310 %x2 = extractelement <16 x i8> %0, i32 2 311 %x3 = extractelement <16 x i8> %0, i32 3 312 %x4 = extractelement <16 x i8> %0, i32 4 313 %x5 = extractelement <16 x i8> %0, i32 5 314 %x6 = extractelement <16 x i8> %0, i32 6 315 %x7 = extractelement <16 x i8> %0, i32 7 316 %x8 = extractelement <16 x i8> %0, i32 8 317 %x9 = extractelement <16 x i8> %0, i32 9 318 %x10 = extractelement <16 x i8> %0, i32 10 319 %x11 = extractelement <16 x i8> %0, i32 11 320 %x12 = extractelement <16 x i8> %0, i32 12 321 %x13 = extractelement <16 x i8> %0, i32 13 322 %x14 = extractelement <16 x i8> %0, i32 14 323 %x15 = extractelement <16 x i8> %0, i32 15 324 %trunc0 = trunc i8 %x0 to i4 325 %trunc1 = trunc i8 %x1 to i4 326 %trunc2 = trunc i8 %x2 to i4 327 %trunc3 = trunc i8 %x3 to i4 328 %trunc4 = trunc i8 %x4 to i4 329 %trunc5 = trunc i8 %x5 to i4 330 %trunc6 = trunc i8 %x6 to i4 331 %trunc7 = trunc i8 %x7 to i4 332 %trunc8 = trunc i8 %x8 to i4 333 %trunc9 = trunc i8 %x9 to i4 334 %trunc10 = trunc i8 %x10 to i4 335 %trunc11 = trunc i8 %x11 to i4 336 %trunc12 = trunc i8 %x12 to i4 337 %trunc13 = trunc i8 %x13 to i4 338 %trunc14 = trunc i8 %x14 to i4 339 %trunc15 = trunc i8 %x15 to i4 340 %ext0 = zext i4 %trunc0 to i8 341 %ext1 = zext i4 %trunc1 to i8 342 %ext2 = zext i4 %trunc2 to i8 343 %ext3 = zext i4 %trunc3 to i8 344 %ext4 = zext i4 %trunc4 to i8 345 %ext5 = zext i4 %trunc5 to i8 346 %ext6 = zext i4 %trunc6 to i8 347 %ext7 = zext i4 %trunc7 to i8 348 %ext8 = zext i4 %trunc8 to i8 349 %ext9 = zext i4 %trunc9 to i8 350 %ext10 = zext i4 %trunc10 to i8 351 %ext11 = zext i4 %trunc11 to i8 352 %ext12 = zext i4 %trunc12 to i8 353 %ext13 = zext i4 %trunc13 to i8 354 %ext14 = zext i4 %trunc14 to i8 355 %ext15 = zext i4 %trunc15 to i8 356 %v0 = insertelement <16 x i8> undef, i8 %ext0, i32 0 357 %v1 = insertelement <16 x i8> %v0, i8 %ext1, i32 1 358 %v2 = insertelement <16 x i8> %v1, i8 %ext2, i32 2 359 %v3 = insertelement <16 x i8> %v2, i8 %ext3, i32 3 360 %v4 = insertelement <16 x i8> %v3, i8 %ext4, i32 4 361 %v5 = insertelement <16 x i8> %v4, i8 %ext5, i32 5 362 %v6 = insertelement <16 x i8> %v5, i8 %ext6, i32 6 363 %v7 = insertelement <16 x i8> %v6, i8 %ext7, i32 7 364 %v8 = insertelement <16 x i8> %v7, i8 %ext8, i32 8 365 %v9 = insertelement <16 x i8> %v8, i8 %ext9, i32 9 366 %v10 = insertelement <16 x i8> %v9, i8 %ext10, i32 10 367 %v11 = insertelement <16 x i8> %v10, i8 %ext11, i32 11 368 %v12 = insertelement <16 x i8> %v11, i8 %ext12, i32 12 369 %v13 = insertelement <16 x i8> %v12, i8 %ext13, i32 13 370 %v14 = insertelement <16 x i8> %v13, i8 %ext14, i32 14 371 %v15 = insertelement <16 x i8> %v14, i8 %ext15, i32 15 372 ret <16 x i8> %v15 373} 374 375define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { 376; SSE-LABEL: _clearupper32xi8a: 377; SSE: # %bb.0: 378; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 379; SSE-NEXT: andps %xmm2, %xmm0 380; SSE-NEXT: andps %xmm2, %xmm1 381; SSE-NEXT: retq 382; 383; AVX-LABEL: _clearupper32xi8a: 384; AVX: # %bb.0: 385; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 386; AVX-NEXT: retq 387 %x0 = extractelement <32 x i8> %0, i32 0 388 %x1 = extractelement <32 x i8> %0, i32 1 389 %x2 = extractelement <32 x i8> %0, i32 2 390 %x3 = extractelement <32 x i8> %0, i32 3 391 %x4 = extractelement <32 x i8> %0, i32 4 392 %x5 = extractelement <32 x i8> %0, i32 5 393 %x6 = extractelement <32 x i8> %0, i32 6 394 %x7 = extractelement <32 x i8> %0, i32 7 395 %x8 = extractelement <32 x i8> %0, i32 8 396 %x9 = extractelement <32 x i8> %0, i32 9 397 %x10 = extractelement <32 x i8> %0, i32 10 398 %x11 = extractelement <32 x i8> %0, i32 11 399 %x12 = extractelement <32 x i8> %0, i32 12 400 %x13 = extractelement <32 x i8> %0, i32 13 401 %x14 = extractelement <32 x i8> %0, i32 14 402 %x15 = extractelement <32 x i8> %0, i32 15 403 %x16 = extractelement <32 x i8> %0, i32 16 404 %x17 = extractelement <32 x i8> %0, i32 17 405 %x18 = extractelement <32 x i8> %0, i32 18 406 %x19 = extractelement <32 x i8> %0, i32 19 407 %x20 = extractelement <32 x i8> %0, i32 20 408 %x21 = extractelement <32 x i8> %0, i32 21 409 %x22 = extractelement <32 x i8> %0, i32 22 410 %x23 = extractelement <32 x i8> %0, i32 23 411 %x24 = extractelement <32 x i8> %0, i32 24 412 %x25 = extractelement <32 x i8> %0, i32 25 413 %x26 = extractelement <32 x i8> %0, i32 26 414 %x27 = extractelement <32 x i8> %0, i32 27 415 %x28 = extractelement <32 x i8> %0, i32 28 416 %x29 = extractelement <32 x i8> %0, i32 29 417 %x30 = extractelement <32 x i8> %0, i32 30 418 %x31 = extractelement <32 x i8> %0, i32 31 419 %trunc0 = trunc i8 %x0 to i4 420 %trunc1 = trunc i8 %x1 to i4 421 %trunc2 = trunc i8 %x2 to i4 422 %trunc3 = trunc i8 %x3 to i4 423 %trunc4 = trunc i8 %x4 to i4 424 %trunc5 = trunc i8 %x5 to i4 425 %trunc6 = trunc i8 %x6 to i4 426 %trunc7 = trunc i8 %x7 to i4 427 %trunc8 = trunc i8 %x8 to i4 428 %trunc9 = trunc i8 %x9 to i4 429 %trunc10 = trunc i8 %x10 to i4 430 %trunc11 = trunc i8 %x11 to i4 431 %trunc12 = trunc i8 %x12 to i4 432 %trunc13 = trunc i8 %x13 to i4 433 %trunc14 = trunc i8 %x14 to i4 434 %trunc15 = trunc i8 %x15 to i4 435 %trunc16 = trunc i8 %x16 to i4 436 %trunc17 = trunc i8 %x17 to i4 437 %trunc18 = trunc i8 %x18 to i4 438 %trunc19 = trunc i8 %x19 to i4 439 %trunc20 = trunc i8 %x20 to i4 440 %trunc21 = trunc i8 %x21 to i4 441 %trunc22 = trunc i8 %x22 to i4 442 %trunc23 = trunc i8 %x23 to i4 443 %trunc24 = trunc i8 %x24 to i4 444 %trunc25 = trunc i8 %x25 to i4 445 %trunc26 = trunc i8 %x26 to i4 446 %trunc27 = trunc i8 %x27 to i4 447 %trunc28 = trunc i8 %x28 to i4 448 %trunc29 = trunc i8 %x29 to i4 449 %trunc30 = trunc i8 %x30 to i4 450 %trunc31 = trunc i8 %x31 to i4 451 %ext0 = zext i4 %trunc0 to i8 452 %ext1 = zext i4 %trunc1 to i8 453 %ext2 = zext i4 %trunc2 to i8 454 %ext3 = zext i4 %trunc3 to i8 455 %ext4 = zext i4 %trunc4 to i8 456 %ext5 = zext i4 %trunc5 to i8 457 %ext6 = zext i4 %trunc6 to i8 458 %ext7 = zext i4 %trunc7 to i8 459 %ext8 = zext i4 %trunc8 to i8 460 %ext9 = zext i4 %trunc9 to i8 461 %ext10 = zext i4 %trunc10 to i8 462 %ext11 = zext i4 %trunc11 to i8 463 %ext12 = zext i4 %trunc12 to i8 464 %ext13 = zext i4 %trunc13 to i8 465 %ext14 = zext i4 %trunc14 to i8 466 %ext15 = zext i4 %trunc15 to i8 467 %ext16 = zext i4 %trunc16 to i8 468 %ext17 = zext i4 %trunc17 to i8 469 %ext18 = zext i4 %trunc18 to i8 470 %ext19 = zext i4 %trunc19 to i8 471 %ext20 = zext i4 %trunc20 to i8 472 %ext21 = zext i4 %trunc21 to i8 473 %ext22 = zext i4 %trunc22 to i8 474 %ext23 = zext i4 %trunc23 to i8 475 %ext24 = zext i4 %trunc24 to i8 476 %ext25 = zext i4 %trunc25 to i8 477 %ext26 = zext i4 %trunc26 to i8 478 %ext27 = zext i4 %trunc27 to i8 479 %ext28 = zext i4 %trunc28 to i8 480 %ext29 = zext i4 %trunc29 to i8 481 %ext30 = zext i4 %trunc30 to i8 482 %ext31 = zext i4 %trunc31 to i8 483 %v0 = insertelement <32 x i8> undef, i8 %ext0, i32 0 484 %v1 = insertelement <32 x i8> %v0, i8 %ext1, i32 1 485 %v2 = insertelement <32 x i8> %v1, i8 %ext2, i32 2 486 %v3 = insertelement <32 x i8> %v2, i8 %ext3, i32 3 487 %v4 = insertelement <32 x i8> %v3, i8 %ext4, i32 4 488 %v5 = insertelement <32 x i8> %v4, i8 %ext5, i32 5 489 %v6 = insertelement <32 x i8> %v5, i8 %ext6, i32 6 490 %v7 = insertelement <32 x i8> %v6, i8 %ext7, i32 7 491 %v8 = insertelement <32 x i8> %v7, i8 %ext8, i32 8 492 %v9 = insertelement <32 x i8> %v8, i8 %ext9, i32 9 493 %v10 = insertelement <32 x i8> %v9, i8 %ext10, i32 10 494 %v11 = insertelement <32 x i8> %v10, i8 %ext11, i32 11 495 %v12 = insertelement <32 x i8> %v11, i8 %ext12, i32 12 496 %v13 = insertelement <32 x i8> %v12, i8 %ext13, i32 13 497 %v14 = insertelement <32 x i8> %v13, i8 %ext14, i32 14 498 %v15 = insertelement <32 x i8> %v14, i8 %ext15, i32 15 499 %v16 = insertelement <32 x i8> %v15, i8 %ext16, i32 16 500 %v17 = insertelement <32 x i8> %v16, i8 %ext17, i32 17 501 %v18 = insertelement <32 x i8> %v17, i8 %ext18, i32 18 502 %v19 = insertelement <32 x i8> %v18, i8 %ext19, i32 19 503 %v20 = insertelement <32 x i8> %v19, i8 %ext20, i32 20 504 %v21 = insertelement <32 x i8> %v20, i8 %ext21, i32 21 505 %v22 = insertelement <32 x i8> %v21, i8 %ext22, i32 22 506 %v23 = insertelement <32 x i8> %v22, i8 %ext23, i32 23 507 %v24 = insertelement <32 x i8> %v23, i8 %ext24, i32 24 508 %v25 = insertelement <32 x i8> %v24, i8 %ext25, i32 25 509 %v26 = insertelement <32 x i8> %v25, i8 %ext26, i32 26 510 %v27 = insertelement <32 x i8> %v26, i8 %ext27, i32 27 511 %v28 = insertelement <32 x i8> %v27, i8 %ext28, i32 28 512 %v29 = insertelement <32 x i8> %v28, i8 %ext29, i32 29 513 %v30 = insertelement <32 x i8> %v29, i8 %ext30, i32 30 514 %v31 = insertelement <32 x i8> %v30, i8 %ext31, i32 31 515 ret <32 x i8> %v31 516} 517 518define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind { 519; SSE2-LABEL: _clearupper2xi64b: 520; SSE2: # %bb.0: 521; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 522; SSE2-NEXT: retq 523; 524; SSE42-LABEL: _clearupper2xi64b: 525; SSE42: # %bb.0: 526; SSE42-NEXT: xorps %xmm1, %xmm1 527; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 528; SSE42-NEXT: retq 529; 530; AVX-LABEL: _clearupper2xi64b: 531; AVX: # %bb.0: 532; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 533; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 534; AVX-NEXT: retq 535 %x32 = bitcast <2 x i64> %0 to <4 x i32> 536 %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1 537 %r1 = insertelement <4 x i32> %r0, i32 zeroinitializer, i32 3 538 %r = bitcast <4 x i32> %r1 to <2 x i64> 539 ret <2 x i64> %r 540} 541 542define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind { 543; SSE2-LABEL: _clearupper4xi64b: 544; SSE2: # %bb.0: 545; SSE2-NEXT: movaps {{.*#+}} xmm2 = [NaN,0.0E+0,NaN,0.0E+0] 546; SSE2-NEXT: andps %xmm2, %xmm0 547; SSE2-NEXT: andps %xmm2, %xmm1 548; SSE2-NEXT: retq 549; 550; SSE42-LABEL: _clearupper4xi64b: 551; SSE42: # %bb.0: 552; SSE42-NEXT: xorps %xmm2, %xmm2 553; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 554; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 555; SSE42-NEXT: retq 556; 557; AVX-LABEL: _clearupper4xi64b: 558; AVX: # %bb.0: 559; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 560; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 561; AVX-NEXT: retq 562 %x32 = bitcast <4 x i64> %0 to <8 x i32> 563 %r0 = insertelement <8 x i32> %x32, i32 zeroinitializer, i32 1 564 %r1 = insertelement <8 x i32> %r0, i32 zeroinitializer, i32 3 565 %r2 = insertelement <8 x i32> %r1, i32 zeroinitializer, i32 5 566 %r3 = insertelement <8 x i32> %r2, i32 zeroinitializer, i32 7 567 %r = bitcast <8 x i32> %r3 to <4 x i64> 568 ret <4 x i64> %r 569} 570 571define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind { 572; SSE2-LABEL: _clearupper4xi32b: 573; SSE2: # %bb.0: 574; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 575; SSE2-NEXT: retq 576; 577; SSE42-LABEL: _clearupper4xi32b: 578; SSE42: # %bb.0: 579; SSE42-NEXT: pxor %xmm1, %xmm1 580; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 581; SSE42-NEXT: retq 582; 583; AVX-LABEL: _clearupper4xi32b: 584; AVX: # %bb.0: 585; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 586; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 587; AVX-NEXT: retq 588 %x16 = bitcast <4 x i32> %0 to <8 x i16> 589 %r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1 590 %r1 = insertelement <8 x i16> %r0, i16 zeroinitializer, i32 3 591 %r2 = insertelement <8 x i16> %r1, i16 zeroinitializer, i32 5 592 %r3 = insertelement <8 x i16> %r2, i16 zeroinitializer, i32 7 593 %r = bitcast <8 x i16> %r3 to <4 x i32> 594 ret <4 x i32> %r 595} 596 597define <8 x i32> @_clearupper8xi32b(<8 x i32>) nounwind { 598; SSE2-LABEL: _clearupper8xi32b: 599; SSE2: # %bb.0: 600; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 601; SSE2-NEXT: andps %xmm2, %xmm0 602; SSE2-NEXT: andps %xmm2, %xmm1 603; SSE2-NEXT: retq 604; 605; SSE42-LABEL: _clearupper8xi32b: 606; SSE42: # %bb.0: 607; SSE42-NEXT: pxor %xmm2, %xmm2 608; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 609; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 610; SSE42-NEXT: retq 611; 612; AVX1-LABEL: _clearupper8xi32b: 613; AVX1: # %bb.0: 614; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 615; AVX1-NEXT: retq 616; 617; AVX2-LABEL: _clearupper8xi32b: 618; AVX2: # %bb.0: 619; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 620; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 621; AVX2-NEXT: retq 622 %x16 = bitcast <8 x i32> %0 to <16 x i16> 623 %r0 = insertelement <16 x i16> %x16, i16 zeroinitializer, i32 1 624 %r1 = insertelement <16 x i16> %r0, i16 zeroinitializer, i32 3 625 %r2 = insertelement <16 x i16> %r1, i16 zeroinitializer, i32 5 626 %r3 = insertelement <16 x i16> %r2, i16 zeroinitializer, i32 7 627 %r4 = insertelement <16 x i16> %r3, i16 zeroinitializer, i32 9 628 %r5 = insertelement <16 x i16> %r4, i16 zeroinitializer, i32 11 629 %r6 = insertelement <16 x i16> %r5, i16 zeroinitializer, i32 13 630 %r7 = insertelement <16 x i16> %r6, i16 zeroinitializer, i32 15 631 %r = bitcast <16 x i16> %r7 to <8 x i32> 632 ret <8 x i32> %r 633} 634 635define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind { 636; SSE-LABEL: _clearupper8xi16b: 637; SSE: # %bb.0: 638; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 639; SSE-NEXT: retq 640; 641; AVX-LABEL: _clearupper8xi16b: 642; AVX: # %bb.0: 643; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 644; AVX-NEXT: retq 645 %x8 = bitcast <8 x i16> %0 to <16 x i8> 646 %r0 = insertelement <16 x i8> %x8, i8 zeroinitializer, i32 1 647 %r1 = insertelement <16 x i8> %r0, i8 zeroinitializer, i32 3 648 %r2 = insertelement <16 x i8> %r1, i8 zeroinitializer, i32 5 649 %r3 = insertelement <16 x i8> %r2, i8 zeroinitializer, i32 7 650 %r4 = insertelement <16 x i8> %r3, i8 zeroinitializer, i32 9 651 %r5 = insertelement <16 x i8> %r4, i8 zeroinitializer, i32 11 652 %r6 = insertelement <16 x i8> %r5, i8 zeroinitializer, i32 13 653 %r7 = insertelement <16 x i8> %r6, i8 zeroinitializer, i32 15 654 %r = bitcast <16 x i8> %r7 to <8 x i16> 655 ret <8 x i16> %r 656} 657 658define <16 x i16> @_clearupper16xi16b(<16 x i16>) nounwind { 659; SSE-LABEL: _clearupper16xi16b: 660; SSE: # %bb.0: 661; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 662; SSE-NEXT: andps %xmm2, %xmm0 663; SSE-NEXT: andps %xmm2, %xmm1 664; SSE-NEXT: retq 665; 666; AVX-LABEL: _clearupper16xi16b: 667; AVX: # %bb.0: 668; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 669; AVX-NEXT: retq 670 %x8 = bitcast <16 x i16> %0 to <32 x i8> 671 %r0 = insertelement <32 x i8> %x8, i8 zeroinitializer, i32 1 672 %r1 = insertelement <32 x i8> %r0, i8 zeroinitializer, i32 3 673 %r2 = insertelement <32 x i8> %r1, i8 zeroinitializer, i32 5 674 %r3 = insertelement <32 x i8> %r2, i8 zeroinitializer, i32 7 675 %r4 = insertelement <32 x i8> %r3, i8 zeroinitializer, i32 9 676 %r5 = insertelement <32 x i8> %r4, i8 zeroinitializer, i32 11 677 %r6 = insertelement <32 x i8> %r5, i8 zeroinitializer, i32 13 678 %r7 = insertelement <32 x i8> %r6, i8 zeroinitializer, i32 15 679 %r8 = insertelement <32 x i8> %r7, i8 zeroinitializer, i32 17 680 %r9 = insertelement <32 x i8> %r8, i8 zeroinitializer, i32 19 681 %r10 = insertelement <32 x i8> %r9, i8 zeroinitializer, i32 21 682 %r11 = insertelement <32 x i8> %r10, i8 zeroinitializer, i32 23 683 %r12 = insertelement <32 x i8> %r11, i8 zeroinitializer, i32 25 684 %r13 = insertelement <32 x i8> %r12, i8 zeroinitializer, i32 27 685 %r14 = insertelement <32 x i8> %r13, i8 zeroinitializer, i32 29 686 %r15 = insertelement <32 x i8> %r14, i8 zeroinitializer, i32 31 687 %r = bitcast <32 x i8> %r15 to <16 x i16> 688 ret <16 x i16> %r 689} 690 691define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind { 692; SSE2-LABEL: _clearupper16xi8b: 693; SSE2: # %bb.0: 694; SSE2-NEXT: pushq %rbx 695; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 696; SSE2-NEXT: movq %xmm1, %r10 697; SSE2-NEXT: movq %r10, %r8 698; SSE2-NEXT: shrq $56, %r8 699; SSE2-NEXT: andl $15, %r8d 700; SSE2-NEXT: movq %r10, %r9 701; SSE2-NEXT: shrq $48, %r9 702; SSE2-NEXT: andl $15, %r9d 703; SSE2-NEXT: movq %r10, %rsi 704; SSE2-NEXT: shrq $40, %rsi 705; SSE2-NEXT: andl $15, %esi 706; SSE2-NEXT: movq %r10, %r11 707; SSE2-NEXT: shrq $32, %r11 708; SSE2-NEXT: andl $15, %r11d 709; SSE2-NEXT: movq %xmm0, %rax 710; SSE2-NEXT: movq %rax, %rdx 711; SSE2-NEXT: shrq $56, %rdx 712; SSE2-NEXT: andl $15, %edx 713; SSE2-NEXT: movq %rax, %rcx 714; SSE2-NEXT: shrq $48, %rcx 715; SSE2-NEXT: andl $15, %ecx 716; SSE2-NEXT: movq %rax, %rdi 717; SSE2-NEXT: shrq $40, %rdi 718; SSE2-NEXT: andl $15, %edi 719; SSE2-NEXT: movq %rax, %rbx 720; SSE2-NEXT: shrq $32, %rbx 721; SSE2-NEXT: andl $15, %ebx 722; SSE2-NEXT: shlq $32, %rbx 723; SSE2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 724; SSE2-NEXT: orq %rbx, %rax 725; SSE2-NEXT: shlq $40, %rdi 726; SSE2-NEXT: orq %rax, %rdi 727; SSE2-NEXT: shlq $48, %rcx 728; SSE2-NEXT: orq %rdi, %rcx 729; SSE2-NEXT: shlq $56, %rdx 730; SSE2-NEXT: orq %rcx, %rdx 731; SSE2-NEXT: shlq $32, %r11 732; SSE2-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 733; SSE2-NEXT: orq %r11, %r10 734; SSE2-NEXT: shlq $40, %rsi 735; SSE2-NEXT: orq %r10, %rsi 736; SSE2-NEXT: shlq $48, %r9 737; SSE2-NEXT: orq %rsi, %r9 738; SSE2-NEXT: shlq $56, %r8 739; SSE2-NEXT: orq %r9, %r8 740; SSE2-NEXT: movq %rdx, %xmm0 741; SSE2-NEXT: movq %r8, %xmm1 742; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 743; SSE2-NEXT: popq %rbx 744; SSE2-NEXT: retq 745; 746; SSE42-LABEL: _clearupper16xi8b: 747; SSE42: # %bb.0: 748; SSE42-NEXT: pushq %rbx 749; SSE42-NEXT: pextrq $1, %xmm0, %r10 750; SSE42-NEXT: movq %r10, %r8 751; SSE42-NEXT: shrq $56, %r8 752; SSE42-NEXT: andl $15, %r8d 753; SSE42-NEXT: movq %r10, %r9 754; SSE42-NEXT: shrq $48, %r9 755; SSE42-NEXT: andl $15, %r9d 756; SSE42-NEXT: movq %r10, %rsi 757; SSE42-NEXT: shrq $40, %rsi 758; SSE42-NEXT: andl $15, %esi 759; SSE42-NEXT: movq %r10, %r11 760; SSE42-NEXT: shrq $32, %r11 761; SSE42-NEXT: andl $15, %r11d 762; SSE42-NEXT: movq %xmm0, %rax 763; SSE42-NEXT: movq %rax, %rdx 764; SSE42-NEXT: shrq $56, %rdx 765; SSE42-NEXT: andl $15, %edx 766; SSE42-NEXT: movq %rax, %rcx 767; SSE42-NEXT: shrq $48, %rcx 768; SSE42-NEXT: andl $15, %ecx 769; SSE42-NEXT: movq %rax, %rdi 770; SSE42-NEXT: shrq $40, %rdi 771; SSE42-NEXT: andl $15, %edi 772; SSE42-NEXT: movq %rax, %rbx 773; SSE42-NEXT: shrq $32, %rbx 774; SSE42-NEXT: andl $15, %ebx 775; SSE42-NEXT: shlq $32, %rbx 776; SSE42-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 777; SSE42-NEXT: orq %rbx, %rax 778; SSE42-NEXT: shlq $40, %rdi 779; SSE42-NEXT: orq %rax, %rdi 780; SSE42-NEXT: shlq $48, %rcx 781; SSE42-NEXT: orq %rdi, %rcx 782; SSE42-NEXT: shlq $56, %rdx 783; SSE42-NEXT: orq %rcx, %rdx 784; SSE42-NEXT: shlq $32, %r11 785; SSE42-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 786; SSE42-NEXT: orq %r11, %r10 787; SSE42-NEXT: shlq $40, %rsi 788; SSE42-NEXT: orq %r10, %rsi 789; SSE42-NEXT: shlq $48, %r9 790; SSE42-NEXT: orq %rsi, %r9 791; SSE42-NEXT: shlq $56, %r8 792; SSE42-NEXT: orq %r9, %r8 793; SSE42-NEXT: movq %r8, %xmm1 794; SSE42-NEXT: movq %rdx, %xmm0 795; SSE42-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 796; SSE42-NEXT: popq %rbx 797; SSE42-NEXT: retq 798; 799; AVX-LABEL: _clearupper16xi8b: 800; AVX: # %bb.0: 801; AVX-NEXT: pushq %rbx 802; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 803; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %r9 804; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 805; AVX-NEXT: movq %r9, %r8 806; AVX-NEXT: shrq $56, %r8 807; AVX-NEXT: andl $15, %r8d 808; AVX-NEXT: movq %r9, %r10 809; AVX-NEXT: shrq $48, %r10 810; AVX-NEXT: andl $15, %r10d 811; AVX-NEXT: movq %rcx, %rdx 812; AVX-NEXT: shldq $24, %r9, %rdx 813; AVX-NEXT: andl $15, %edx 814; AVX-NEXT: movq %r9, %r11 815; AVX-NEXT: shrq $32, %r11 816; AVX-NEXT: andl $15, %r11d 817; AVX-NEXT: movq %rcx, %rdi 818; AVX-NEXT: shrq $56, %rdi 819; AVX-NEXT: andl $15, %edi 820; AVX-NEXT: movq %rcx, %rsi 821; AVX-NEXT: shrq $48, %rsi 822; AVX-NEXT: andl $15, %esi 823; AVX-NEXT: movq %rcx, %rax 824; AVX-NEXT: shrq $40, %rax 825; AVX-NEXT: andl $15, %eax 826; AVX-NEXT: movq %rcx, %rbx 827; AVX-NEXT: shrq $32, %rbx 828; AVX-NEXT: andl $15, %ebx 829; AVX-NEXT: shlq $32, %rbx 830; AVX-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 831; AVX-NEXT: orq %rbx, %rcx 832; AVX-NEXT: shlq $40, %rax 833; AVX-NEXT: orq %rcx, %rax 834; AVX-NEXT: shlq $48, %rsi 835; AVX-NEXT: orq %rax, %rsi 836; AVX-NEXT: shlq $56, %rdi 837; AVX-NEXT: orq %rsi, %rdi 838; AVX-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) 839; AVX-NEXT: shlq $32, %r11 840; AVX-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F 841; AVX-NEXT: orq %r11, %r9 842; AVX-NEXT: shlq $40, %rdx 843; AVX-NEXT: orq %r9, %rdx 844; AVX-NEXT: shlq $48, %r10 845; AVX-NEXT: orq %rdx, %r10 846; AVX-NEXT: shlq $56, %r8 847; AVX-NEXT: orq %r10, %r8 848; AVX-NEXT: movq %r8, -{{[0-9]+}}(%rsp) 849; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 850; AVX-NEXT: popq %rbx 851; AVX-NEXT: retq 852 %x4 = bitcast <16 x i8> %0 to <32 x i4> 853 %r0 = insertelement <32 x i4> %x4, i4 zeroinitializer, i32 1 854 %r1 = insertelement <32 x i4> %r0, i4 zeroinitializer, i32 3 855 %r2 = insertelement <32 x i4> %r1, i4 zeroinitializer, i32 5 856 %r3 = insertelement <32 x i4> %r2, i4 zeroinitializer, i32 7 857 %r4 = insertelement <32 x i4> %r3, i4 zeroinitializer, i32 9 858 %r5 = insertelement <32 x i4> %r4, i4 zeroinitializer, i32 11 859 %r6 = insertelement <32 x i4> %r5, i4 zeroinitializer, i32 13 860 %r7 = insertelement <32 x i4> %r6, i4 zeroinitializer, i32 15 861 %r8 = insertelement <32 x i4> %r7, i4 zeroinitializer, i32 17 862 %r9 = insertelement <32 x i4> %r8, i4 zeroinitializer, i32 19 863 %r10 = insertelement <32 x i4> %r9, i4 zeroinitializer, i32 21 864 %r11 = insertelement <32 x i4> %r10, i4 zeroinitializer, i32 23 865 %r12 = insertelement <32 x i4> %r11, i4 zeroinitializer, i32 25 866 %r13 = insertelement <32 x i4> %r12, i4 zeroinitializer, i32 27 867 %r14 = insertelement <32 x i4> %r13, i4 zeroinitializer, i32 29 868 %r15 = insertelement <32 x i4> %r14, i4 zeroinitializer, i32 31 869 %r = bitcast <32 x i4> %r15 to <16 x i8> 870 ret <16 x i8> %r 871} 872 873define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { 874; SSE2-LABEL: _clearupper32xi8b: 875; SSE2: # %bb.0: 876; SSE2-NEXT: pushq %rbx 877; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 878; SSE2-NEXT: movq %xmm2, %r10 879; SSE2-NEXT: movq %r10, %r8 880; SSE2-NEXT: shrq $56, %r8 881; SSE2-NEXT: andl $15, %r8d 882; SSE2-NEXT: movq %r10, %r9 883; SSE2-NEXT: shrq $48, %r9 884; SSE2-NEXT: andl $15, %r9d 885; SSE2-NEXT: movq %r10, %rsi 886; SSE2-NEXT: shrq $40, %rsi 887; SSE2-NEXT: andl $15, %esi 888; SSE2-NEXT: movq %r10, %r11 889; SSE2-NEXT: shrq $32, %r11 890; SSE2-NEXT: andl $15, %r11d 891; SSE2-NEXT: movq %xmm0, %rax 892; SSE2-NEXT: movq %rax, %rdx 893; SSE2-NEXT: shrq $56, %rdx 894; SSE2-NEXT: andl $15, %edx 895; SSE2-NEXT: movq %rax, %rcx 896; SSE2-NEXT: shrq $48, %rcx 897; SSE2-NEXT: andl $15, %ecx 898; SSE2-NEXT: movq %rax, %rdi 899; SSE2-NEXT: shrq $40, %rdi 900; SSE2-NEXT: andl $15, %edi 901; SSE2-NEXT: movq %rax, %rbx 902; SSE2-NEXT: shrq $32, %rbx 903; SSE2-NEXT: andl $15, %ebx 904; SSE2-NEXT: shlq $32, %rbx 905; SSE2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 906; SSE2-NEXT: orq %rbx, %rax 907; SSE2-NEXT: shlq $40, %rdi 908; SSE2-NEXT: orq %rax, %rdi 909; SSE2-NEXT: shlq $48, %rcx 910; SSE2-NEXT: orq %rdi, %rcx 911; SSE2-NEXT: shlq $56, %rdx 912; SSE2-NEXT: orq %rcx, %rdx 913; SSE2-NEXT: shlq $32, %r11 914; SSE2-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 915; SSE2-NEXT: orq %r11, %r10 916; SSE2-NEXT: shlq $40, %rsi 917; SSE2-NEXT: orq %r10, %rsi 918; SSE2-NEXT: shlq $48, %r9 919; SSE2-NEXT: orq %rsi, %r9 920; SSE2-NEXT: shlq $56, %r8 921; SSE2-NEXT: orq %r9, %r8 922; SSE2-NEXT: movq %rdx, %xmm0 923; SSE2-NEXT: movq %r8, %xmm2 924; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 925; SSE2-NEXT: popq %rbx 926; SSE2-NEXT: retq 927; 928; SSE42-LABEL: _clearupper32xi8b: 929; SSE42: # %bb.0: 930; SSE42-NEXT: pushq %rbx 931; SSE42-NEXT: pextrq $1, %xmm0, %r10 932; SSE42-NEXT: movq %r10, %r8 933; SSE42-NEXT: shrq $56, %r8 934; SSE42-NEXT: andl $15, %r8d 935; SSE42-NEXT: movq %r10, %r9 936; SSE42-NEXT: shrq $48, %r9 937; SSE42-NEXT: andl $15, %r9d 938; SSE42-NEXT: movq %r10, %rsi 939; SSE42-NEXT: shrq $40, %rsi 940; SSE42-NEXT: andl $15, %esi 941; SSE42-NEXT: movq %r10, %r11 942; SSE42-NEXT: shrq $32, %r11 943; SSE42-NEXT: andl $15, %r11d 944; SSE42-NEXT: movq %xmm0, %rax 945; SSE42-NEXT: movq %rax, %rdx 946; SSE42-NEXT: shrq $56, %rdx 947; SSE42-NEXT: andl $15, %edx 948; SSE42-NEXT: movq %rax, %rcx 949; SSE42-NEXT: shrq $48, %rcx 950; SSE42-NEXT: andl $15, %ecx 951; SSE42-NEXT: movq %rax, %rdi 952; SSE42-NEXT: shrq $40, %rdi 953; SSE42-NEXT: andl $15, %edi 954; SSE42-NEXT: movq %rax, %rbx 955; SSE42-NEXT: shrq $32, %rbx 956; SSE42-NEXT: andl $15, %ebx 957; SSE42-NEXT: shlq $32, %rbx 958; SSE42-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 959; SSE42-NEXT: orq %rbx, %rax 960; SSE42-NEXT: shlq $40, %rdi 961; SSE42-NEXT: orq %rax, %rdi 962; SSE42-NEXT: shlq $48, %rcx 963; SSE42-NEXT: orq %rdi, %rcx 964; SSE42-NEXT: shlq $56, %rdx 965; SSE42-NEXT: orq %rcx, %rdx 966; SSE42-NEXT: shlq $32, %r11 967; SSE42-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 968; SSE42-NEXT: orq %r11, %r10 969; SSE42-NEXT: shlq $40, %rsi 970; SSE42-NEXT: orq %r10, %rsi 971; SSE42-NEXT: shlq $48, %r9 972; SSE42-NEXT: orq %rsi, %r9 973; SSE42-NEXT: shlq $56, %r8 974; SSE42-NEXT: orq %r9, %r8 975; SSE42-NEXT: movq %r8, %xmm2 976; SSE42-NEXT: movq %rdx, %xmm0 977; SSE42-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 978; SSE42-NEXT: popq %rbx 979; SSE42-NEXT: retq 980; 981; AVX1-LABEL: _clearupper32xi8b: 982; AVX1: # %bb.0: 983; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 984; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax 985; AVX1-NEXT: movq %rax, %r8 986; AVX1-NEXT: movq %rax, %rdx 987; AVX1-NEXT: movq %rax, %rsi 988; AVX1-NEXT: movq %rax, %rdi 989; AVX1-NEXT: movq %rax, %rcx 990; AVX1-NEXT: shrq $32, %rcx 991; AVX1-NEXT: andl $15, %ecx 992; AVX1-NEXT: shlq $32, %rcx 993; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 994; AVX1-NEXT: orq %rcx, %rax 995; AVX1-NEXT: shrq $40, %rdi 996; AVX1-NEXT: andl $15, %edi 997; AVX1-NEXT: shlq $40, %rdi 998; AVX1-NEXT: orq %rax, %rdi 999; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1000; AVX1-NEXT: shrq $48, %rsi 1001; AVX1-NEXT: andl $15, %esi 1002; AVX1-NEXT: shlq $48, %rsi 1003; AVX1-NEXT: orq %rdi, %rsi 1004; AVX1-NEXT: movq %rax, %rcx 1005; AVX1-NEXT: shrq $56, %rdx 1006; AVX1-NEXT: andl $15, %edx 1007; AVX1-NEXT: shlq $56, %rdx 1008; AVX1-NEXT: orq %rsi, %rdx 1009; AVX1-NEXT: movq %rax, %rsi 1010; AVX1-NEXT: shldq $24, %rax, %r8 1011; AVX1-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) 1012; AVX1-NEXT: movq %rax, %rdx 1013; AVX1-NEXT: shrq $32, %rdx 1014; AVX1-NEXT: andl $15, %edx 1015; AVX1-NEXT: shlq $32, %rdx 1016; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 1017; AVX1-NEXT: orq %rdx, %rax 1018; AVX1-NEXT: andl $15, %r8d 1019; AVX1-NEXT: shlq $40, %r8 1020; AVX1-NEXT: orq %rax, %r8 1021; AVX1-NEXT: shrq $48, %rsi 1022; AVX1-NEXT: andl $15, %esi 1023; AVX1-NEXT: shlq $48, %rsi 1024; AVX1-NEXT: orq %r8, %rsi 1025; AVX1-NEXT: shrq $56, %rcx 1026; AVX1-NEXT: andl $15, %ecx 1027; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1028; AVX1-NEXT: shlq $56, %rcx 1029; AVX1-NEXT: orq %rsi, %rcx 1030; AVX1-NEXT: vmovq %xmm0, %rax 1031; AVX1-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) 1032; AVX1-NEXT: movl %eax, %ecx 1033; AVX1-NEXT: shrl $8, %ecx 1034; AVX1-NEXT: vmovd %eax, %xmm1 1035; AVX1-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 1036; AVX1-NEXT: movl %eax, %ecx 1037; AVX1-NEXT: shrl $16, %ecx 1038; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 1039; AVX1-NEXT: movl %eax, %ecx 1040; AVX1-NEXT: shrl $24, %ecx 1041; AVX1-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 1042; AVX1-NEXT: movq %rax, %rcx 1043; AVX1-NEXT: shrq $32, %rcx 1044; AVX1-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 1045; AVX1-NEXT: movq %rax, %rcx 1046; AVX1-NEXT: shrq $40, %rcx 1047; AVX1-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 1048; AVX1-NEXT: movq %rax, %rcx 1049; AVX1-NEXT: shrq $48, %rcx 1050; AVX1-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 1051; AVX1-NEXT: vpextrq $1, %xmm0, %rcx 1052; AVX1-NEXT: shrq $56, %rax 1053; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0 1054; AVX1-NEXT: movl %ecx, %eax 1055; AVX1-NEXT: shrl $8, %eax 1056; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 1057; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 1058; AVX1-NEXT: movl %ecx, %eax 1059; AVX1-NEXT: shrl $16, %eax 1060; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1061; AVX1-NEXT: movl %ecx, %eax 1062; AVX1-NEXT: shrl $24, %eax 1063; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 1064; AVX1-NEXT: movq %rcx, %rax 1065; AVX1-NEXT: shrq $32, %rax 1066; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1067; AVX1-NEXT: movq %rcx, %rax 1068; AVX1-NEXT: shrq $40, %rax 1069; AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 1070; AVX1-NEXT: movq %rcx, %rax 1071; AVX1-NEXT: shrq $48, %rax 1072; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1073; AVX1-NEXT: shrq $56, %rcx 1074; AVX1-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 1075; AVX1-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm1 1076; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1077; AVX1-NEXT: retq 1078; 1079; AVX2-LABEL: _clearupper32xi8b: 1080; AVX2: # %bb.0: 1081; AVX2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1082; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1083; AVX2-NEXT: movq %rax, %r8 1084; AVX2-NEXT: movq %rax, %rdx 1085; AVX2-NEXT: movq %rax, %rsi 1086; AVX2-NEXT: movq %rax, %rdi 1087; AVX2-NEXT: movq %rax, %rcx 1088; AVX2-NEXT: shrq $32, %rcx 1089; AVX2-NEXT: andl $15, %ecx 1090; AVX2-NEXT: shlq $32, %rcx 1091; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 1092; AVX2-NEXT: orq %rcx, %rax 1093; AVX2-NEXT: shrq $40, %rdi 1094; AVX2-NEXT: andl $15, %edi 1095; AVX2-NEXT: shlq $40, %rdi 1096; AVX2-NEXT: orq %rax, %rdi 1097; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1098; AVX2-NEXT: shrq $48, %rsi 1099; AVX2-NEXT: andl $15, %esi 1100; AVX2-NEXT: shlq $48, %rsi 1101; AVX2-NEXT: orq %rdi, %rsi 1102; AVX2-NEXT: movq %rax, %rcx 1103; AVX2-NEXT: shrq $56, %rdx 1104; AVX2-NEXT: andl $15, %edx 1105; AVX2-NEXT: shlq $56, %rdx 1106; AVX2-NEXT: orq %rsi, %rdx 1107; AVX2-NEXT: movq %rax, %rsi 1108; AVX2-NEXT: shldq $24, %rax, %r8 1109; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) 1110; AVX2-NEXT: movq %rax, %rdx 1111; AVX2-NEXT: shrq $32, %rdx 1112; AVX2-NEXT: andl $15, %edx 1113; AVX2-NEXT: shlq $32, %rdx 1114; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 1115; AVX2-NEXT: orq %rdx, %rax 1116; AVX2-NEXT: andl $15, %r8d 1117; AVX2-NEXT: shlq $40, %r8 1118; AVX2-NEXT: orq %rax, %r8 1119; AVX2-NEXT: shrq $48, %rsi 1120; AVX2-NEXT: andl $15, %esi 1121; AVX2-NEXT: shlq $48, %rsi 1122; AVX2-NEXT: orq %r8, %rsi 1123; AVX2-NEXT: shrq $56, %rcx 1124; AVX2-NEXT: andl $15, %ecx 1125; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1126; AVX2-NEXT: shlq $56, %rcx 1127; AVX2-NEXT: orq %rsi, %rcx 1128; AVX2-NEXT: vmovq %xmm0, %rax 1129; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) 1130; AVX2-NEXT: movl %eax, %ecx 1131; AVX2-NEXT: shrl $8, %ecx 1132; AVX2-NEXT: vmovd %eax, %xmm1 1133; AVX2-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 1134; AVX2-NEXT: movl %eax, %ecx 1135; AVX2-NEXT: shrl $16, %ecx 1136; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 1137; AVX2-NEXT: movl %eax, %ecx 1138; AVX2-NEXT: shrl $24, %ecx 1139; AVX2-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 1140; AVX2-NEXT: movq %rax, %rcx 1141; AVX2-NEXT: shrq $32, %rcx 1142; AVX2-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 1143; AVX2-NEXT: movq %rax, %rcx 1144; AVX2-NEXT: shrq $40, %rcx 1145; AVX2-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 1146; AVX2-NEXT: movq %rax, %rcx 1147; AVX2-NEXT: shrq $48, %rcx 1148; AVX2-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 1149; AVX2-NEXT: vpextrq $1, %xmm0, %rcx 1150; AVX2-NEXT: shrq $56, %rax 1151; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0 1152; AVX2-NEXT: movl %ecx, %eax 1153; AVX2-NEXT: shrl $8, %eax 1154; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 1155; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 1156; AVX2-NEXT: movl %ecx, %eax 1157; AVX2-NEXT: shrl $16, %eax 1158; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1159; AVX2-NEXT: movl %ecx, %eax 1160; AVX2-NEXT: shrl $24, %eax 1161; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 1162; AVX2-NEXT: movq %rcx, %rax 1163; AVX2-NEXT: shrq $32, %rax 1164; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1165; AVX2-NEXT: movq %rcx, %rax 1166; AVX2-NEXT: shrq $40, %rax 1167; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 1168; AVX2-NEXT: movq %rcx, %rax 1169; AVX2-NEXT: shrq $48, %rax 1170; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1171; AVX2-NEXT: shrq $56, %rcx 1172; AVX2-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 1173; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm1 1174; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1175; AVX2-NEXT: retq 1176 %x4 = bitcast <32 x i8> %0 to <64 x i4> 1177 %r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1 1178 %r1 = insertelement <64 x i4> %r0, i4 zeroinitializer, i32 3 1179 %r2 = insertelement <64 x i4> %r1, i4 zeroinitializer, i32 5 1180 %r3 = insertelement <64 x i4> %r2, i4 zeroinitializer, i32 7 1181 %r4 = insertelement <64 x i4> %r3, i4 zeroinitializer, i32 9 1182 %r5 = insertelement <64 x i4> %r4, i4 zeroinitializer, i32 11 1183 %r6 = insertelement <64 x i4> %r5, i4 zeroinitializer, i32 13 1184 %r7 = insertelement <64 x i4> %r6, i4 zeroinitializer, i32 15 1185 %r8 = insertelement <64 x i4> %r7, i4 zeroinitializer, i32 17 1186 %r9 = insertelement <64 x i4> %r8, i4 zeroinitializer, i32 19 1187 %r10 = insertelement <64 x i4> %r9, i4 zeroinitializer, i32 21 1188 %r11 = insertelement <64 x i4> %r10, i4 zeroinitializer, i32 23 1189 %r12 = insertelement <64 x i4> %r11, i4 zeroinitializer, i32 25 1190 %r13 = insertelement <64 x i4> %r12, i4 zeroinitializer, i32 27 1191 %r14 = insertelement <64 x i4> %r13, i4 zeroinitializer, i32 29 1192 %r15 = insertelement <64 x i4> %r14, i4 zeroinitializer, i32 31 1193 %r16 = insertelement <64 x i4> %r15, i4 zeroinitializer, i32 33 1194 %r17 = insertelement <64 x i4> %r16, i4 zeroinitializer, i32 35 1195 %r18 = insertelement <64 x i4> %r17, i4 zeroinitializer, i32 37 1196 %r19 = insertelement <64 x i4> %r18, i4 zeroinitializer, i32 39 1197 %r20 = insertelement <64 x i4> %r19, i4 zeroinitializer, i32 41 1198 %r21 = insertelement <64 x i4> %r20, i4 zeroinitializer, i32 43 1199 %r22 = insertelement <64 x i4> %r21, i4 zeroinitializer, i32 45 1200 %r23 = insertelement <64 x i4> %r22, i4 zeroinitializer, i32 47 1201 %r24 = insertelement <64 x i4> %r23, i4 zeroinitializer, i32 49 1202 %r25 = insertelement <64 x i4> %r24, i4 zeroinitializer, i32 51 1203 %r26 = insertelement <64 x i4> %r25, i4 zeroinitializer, i32 53 1204 %r27 = insertelement <64 x i4> %r26, i4 zeroinitializer, i32 55 1205 %r28 = insertelement <64 x i4> %r27, i4 zeroinitializer, i32 57 1206 %r29 = insertelement <64 x i4> %r28, i4 zeroinitializer, i32 59 1207 %r30 = insertelement <64 x i4> %r29, i4 zeroinitializer, i32 61 1208 %r31 = insertelement <64 x i4> %r30, i4 zeroinitializer, i32 63 1209 %r = bitcast <64 x i4> %r15 to <32 x i8> 1210 ret <32 x i8> %r 1211} 1212 1213define <2 x i64> @_clearupper2xi64c(<2 x i64>) nounwind { 1214; SSE2-LABEL: _clearupper2xi64c: 1215; SSE2: # %bb.0: 1216; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1217; SSE2-NEXT: retq 1218; 1219; SSE42-LABEL: _clearupper2xi64c: 1220; SSE42: # %bb.0: 1221; SSE42-NEXT: xorps %xmm1, %xmm1 1222; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1223; SSE42-NEXT: retq 1224; 1225; AVX-LABEL: _clearupper2xi64c: 1226; AVX: # %bb.0: 1227; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1228; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1229; AVX-NEXT: retq 1230 %r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0 1231 ret <2 x i64> %r 1232} 1233 1234define <4 x i64> @_clearupper4xi64c(<4 x i64>) nounwind { 1235; SSE2-LABEL: _clearupper4xi64c: 1236; SSE2: # %bb.0: 1237; SSE2-NEXT: movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0] 1238; SSE2-NEXT: andps %xmm2, %xmm0 1239; SSE2-NEXT: andps %xmm2, %xmm1 1240; SSE2-NEXT: retq 1241; 1242; SSE42-LABEL: _clearupper4xi64c: 1243; SSE42: # %bb.0: 1244; SSE42-NEXT: xorps %xmm2, %xmm2 1245; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 1246; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1247; SSE42-NEXT: retq 1248; 1249; AVX-LABEL: _clearupper4xi64c: 1250; AVX: # %bb.0: 1251; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1252; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1253; AVX-NEXT: retq 1254 %r = and <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>, %0 1255 ret <4 x i64> %r 1256} 1257 1258define <4 x i32> @_clearupper4xi32c(<4 x i32>) nounwind { 1259; SSE2-LABEL: _clearupper4xi32c: 1260; SSE2: # %bb.0: 1261; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1262; SSE2-NEXT: retq 1263; 1264; SSE42-LABEL: _clearupper4xi32c: 1265; SSE42: # %bb.0: 1266; SSE42-NEXT: pxor %xmm1, %xmm1 1267; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 1268; SSE42-NEXT: retq 1269; 1270; AVX-LABEL: _clearupper4xi32c: 1271; AVX: # %bb.0: 1272; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1273; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 1274; AVX-NEXT: retq 1275 %r = and <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, %0 1276 ret <4 x i32> %r 1277} 1278 1279define <8 x i32> @_clearupper8xi32c(<8 x i32>) nounwind { 1280; SSE2-LABEL: _clearupper8xi32c: 1281; SSE2: # %bb.0: 1282; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 1283; SSE2-NEXT: andps %xmm2, %xmm0 1284; SSE2-NEXT: andps %xmm2, %xmm1 1285; SSE2-NEXT: retq 1286; 1287; SSE42-LABEL: _clearupper8xi32c: 1288; SSE42: # %bb.0: 1289; SSE42-NEXT: pxor %xmm2, %xmm2 1290; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 1291; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 1292; SSE42-NEXT: retq 1293; 1294; AVX1-LABEL: _clearupper8xi32c: 1295; AVX1: # %bb.0: 1296; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1297; AVX1-NEXT: retq 1298; 1299; AVX2-LABEL: _clearupper8xi32c: 1300; AVX2: # %bb.0: 1301; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1302; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1303; AVX2-NEXT: retq 1304 %r = and <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>, %0 1305 ret <8 x i32> %r 1306} 1307 1308define <8 x i16> @_clearupper8xi16c(<8 x i16>) nounwind { 1309; SSE-LABEL: _clearupper8xi16c: 1310; SSE: # %bb.0: 1311; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1312; SSE-NEXT: retq 1313; 1314; AVX-LABEL: _clearupper8xi16c: 1315; AVX: # %bb.0: 1316; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1317; AVX-NEXT: retq 1318 %r = and <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0 1319 ret <8 x i16> %r 1320} 1321 1322define <16 x i16> @_clearupper16xi16c(<16 x i16>) nounwind { 1323; SSE-LABEL: _clearupper16xi16c: 1324; SSE: # %bb.0: 1325; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 1326; SSE-NEXT: andps %xmm2, %xmm0 1327; SSE-NEXT: andps %xmm2, %xmm1 1328; SSE-NEXT: retq 1329; 1330; AVX-LABEL: _clearupper16xi16c: 1331; AVX: # %bb.0: 1332; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1333; AVX-NEXT: retq 1334 %r = and <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0 1335 ret <16 x i16> %r 1336} 1337 1338define <16 x i8> @_clearupper16xi8c(<16 x i8>) nounwind { 1339; SSE-LABEL: _clearupper16xi8c: 1340; SSE: # %bb.0: 1341; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1342; SSE-NEXT: retq 1343; 1344; AVX-LABEL: _clearupper16xi8c: 1345; AVX: # %bb.0: 1346; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1347; AVX-NEXT: retq 1348 %r = and <16 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0 1349 ret <16 x i8> %r 1350} 1351 1352define <32 x i8> @_clearupper32xi8c(<32 x i8>) nounwind { 1353; SSE-LABEL: _clearupper32xi8c: 1354; SSE: # %bb.0: 1355; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1356; SSE-NEXT: andps %xmm2, %xmm0 1357; SSE-NEXT: andps %xmm2, %xmm1 1358; SSE-NEXT: retq 1359; 1360; AVX-LABEL: _clearupper32xi8c: 1361; AVX: # %bb.0: 1362; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1363; AVX-NEXT: retq 1364 %r = and <32 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0 1365 ret <32 x i8> %r 1366} 1367