1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+kl,+widekl | FileCheck %s 3 4; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/X86/keylocker-builtins.c 5 6define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) { 7; CHECK-LABEL: test_loadiwkey: 8; CHECK: # %bb.0: # %entry 9; CHECK-NEXT: movl %edi, %eax 10; CHECK-NEXT: loadiwkey %xmm2, %xmm1 11; CHECK-NEXT: retq 12entry: 13 tail call void @llvm.x86.loadiwkey(<2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi, i32 %ctl) 14 ret void 15} 16 17define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, i8* nocapture %h) { 18; CHECK-LABEL: test_encodekey128_u32: 19; CHECK: # %bb.0: # %entry 20; CHECK-NEXT: encodekey128 %edi, %eax 21; CHECK-NEXT: movups %xmm0, (%rsi) 22; CHECK-NEXT: movups %xmm1, 16(%rsi) 23; CHECK-NEXT: movups %xmm2, 32(%rsi) 24; CHECK-NEXT: movups %xmm4, 48(%rsi) 25; CHECK-NEXT: movups %xmm5, 64(%rsi) 26; CHECK-NEXT: movups %xmm6, 80(%rsi) 27; CHECK-NEXT: retq 28entry: 29 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key) 30 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 31 %2 = bitcast i8* %h to <2 x i64>* 32 store <2 x i64> %1, <2 x i64>* %2, align 1 33 %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 34 %4 = getelementptr i8, i8* %h, i64 16 35 %5 = bitcast i8* %4 to <2 x i64>* 36 store <2 x i64> %3, <2 x i64>* %5, align 1 37 %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 38 %7 = getelementptr i8, i8* %h, i64 32 39 %8 = bitcast i8* %7 to <2 x i64>* 40 store <2 x i64> %6, <2 x i64>* %8, align 1 41 %9 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 42 %10 = getelementptr i8, i8* %h, i64 48 43 %11 = bitcast i8* %10 to <2 x i64>* 44 store <2 x i64> %9, <2 x i64>* %11, align 1 45 %12 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 46 %13 = getelementptr i8, i8* %h, i64 64 47 %14 = bitcast i8* %13 to <2 x i64>* 48 store <2 x i64> %12, <2 x i64>* %14, align 1 49 %15 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 50 %16 = getelementptr i8, i8* %h, i64 80 51 %17 = bitcast i8* %16 to <2 x i64>* 52 store <2 x i64> %15, <2 x i64>* %17, align 1 53 %18 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 54 ret i32 %18 55} 56 57define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, i8* nocapture %h) { 58; CHECK-LABEL: test_encodekey256_u32: 59; CHECK: # %bb.0: # %entry 60; CHECK-NEXT: encodekey256 %edi, %eax 61; CHECK-NEXT: movups %xmm0, (%rsi) 62; CHECK-NEXT: movups %xmm1, 16(%rsi) 63; CHECK-NEXT: movups %xmm2, 32(%rsi) 64; CHECK-NEXT: movups %xmm3, 48(%rsi) 65; CHECK-NEXT: movups %xmm4, 64(%rsi) 66; CHECK-NEXT: movups %xmm5, 80(%rsi) 67; CHECK-NEXT: movups %xmm6, 96(%rsi) 68; CHECK-NEXT: retq 69entry: 70 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi) 71 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 72 %2 = bitcast i8* %h to <2 x i64>* 73 store <2 x i64> %1, <2 x i64>* %2, align 1 74 %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 75 %4 = getelementptr i8, i8* %h, i64 16 76 %5 = bitcast i8* %4 to <2 x i64>* 77 store <2 x i64> %3, <2 x i64>* %5, align 1 78 %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 79 %7 = getelementptr i8, i8* %h, i64 32 80 %8 = bitcast i8* %7 to <2 x i64>* 81 store <2 x i64> %6, <2 x i64>* %8, align 1 82 %9 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 83 %10 = getelementptr i8, i8* %h, i64 48 84 %11 = bitcast i8* %10 to <2 x i64>* 85 store <2 x i64> %9, <2 x i64>* %11, align 1 86 %12 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 87 %13 = getelementptr i8, i8* %h, i64 64 88 %14 = bitcast i8* %13 to <2 x i64>* 89 store <2 x i64> %12, <2 x i64>* %14, align 1 90 %15 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 91 %16 = getelementptr i8, i8* %h, i64 80 92 %17 = bitcast i8* %16 to <2 x i64>* 93 store <2 x i64> %15, <2 x i64>* %17, align 1 94 %18 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 95 %19 = getelementptr i8, i8* %h, i64 96 96 %20 = bitcast i8* %19 to <2 x i64>* 97 store <2 x i64> %18, <2 x i64>* %20, align 1 98 %21 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 99 ret i32 %21 100} 101 102define zeroext i8 @test_mm_aesenc256kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) { 103; CHECK-LABEL: test_mm_aesenc256kl_u8: 104; CHECK: # %bb.0: # %entry 105; CHECK-NEXT: xorl %eax, %eax 106; CHECK-NEXT: aesenc256kl (%rsi), %xmm0 107; CHECK-NEXT: sete %al 108; CHECK-NEXT: movaps %xmm0, (%rdi) 109; CHECK-NEXT: retq 110entry: 111 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %idata, i8* %h) #1 112 %1 = extractvalue { i8, <2 x i64> } %0, 1 113 store <2 x i64> %1, <2 x i64>* %odata, align 16 114 %2 = extractvalue { i8, <2 x i64> } %0, 0 115 ret i8 %2 116} 117 118define zeroext i8 @test_mm_aesdec256kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) { 119; CHECK-LABEL: test_mm_aesdec256kl_u8: 120; CHECK: # %bb.0: # %entry 121; CHECK-NEXT: xorl %eax, %eax 122; CHECK-NEXT: aesdec256kl (%rsi), %xmm0 123; CHECK-NEXT: sete %al 124; CHECK-NEXT: movaps %xmm0, (%rdi) 125; CHECK-NEXT: retq 126entry: 127 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %idata, i8* %h) #1 128 %1 = extractvalue { i8, <2 x i64> } %0, 1 129 store <2 x i64> %1, <2 x i64>* %odata, align 16 130 %2 = extractvalue { i8, <2 x i64> } %0, 0 131 ret i8 %2 132} 133 134define zeroext i8 @test_mm_aesenc128kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) { 135; CHECK-LABEL: test_mm_aesenc128kl_u8: 136; CHECK: # %bb.0: # %entry 137; CHECK-NEXT: xorl %eax, %eax 138; CHECK-NEXT: aesenc128kl (%rsi), %xmm0 139; CHECK-NEXT: sete %al 140; CHECK-NEXT: movaps %xmm0, (%rdi) 141; CHECK-NEXT: retq 142entry: 143 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %idata, i8* %h) #1 144 %1 = extractvalue { i8, <2 x i64> } %0, 1 145 store <2 x i64> %1, <2 x i64>* %odata, align 16 146 %2 = extractvalue { i8, <2 x i64> } %0, 0 147 ret i8 %2 148} 149 150define zeroext i8 @test_mm_aesdec128kl_u8(<2 x i64>* %odata, <2 x i64> %idata, i8* %h) { 151; CHECK-LABEL: test_mm_aesdec128kl_u8: 152; CHECK: # %bb.0: # %entry 153; CHECK-NEXT: xorl %eax, %eax 154; CHECK-NEXT: aesdec128kl (%rsi), %xmm0 155; CHECK-NEXT: sete %al 156; CHECK-NEXT: movaps %xmm0, (%rdi) 157; CHECK-NEXT: retq 158entry: 159 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %idata, i8* %h) #1 160 %1 = extractvalue { i8, <2 x i64> } %0, 1 161 store <2 x i64> %1, <2 x i64>* %odata, align 16 162 %2 = extractvalue { i8, <2 x i64> } %0, 0 163 ret i8 %2 164} 165 166define zeroext i8 @test__mm_aesencwide128kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) { 167; CHECK-LABEL: test__mm_aesencwide128kl_u8: 168; CHECK: # %bb.0: # %entry 169; CHECK-NEXT: movaps (%rsi), %xmm0 170; CHECK-NEXT: movaps 16(%rsi), %xmm1 171; CHECK-NEXT: movaps 32(%rsi), %xmm2 172; CHECK-NEXT: movaps 48(%rsi), %xmm3 173; CHECK-NEXT: movaps 64(%rsi), %xmm4 174; CHECK-NEXT: movaps 80(%rsi), %xmm5 175; CHECK-NEXT: movaps 96(%rsi), %xmm6 176; CHECK-NEXT: movaps 112(%rsi), %xmm7 177; CHECK-NEXT: xorl %eax, %eax 178; CHECK-NEXT: aesencwide128kl (%rdx) 179; CHECK-NEXT: sete %al 180; CHECK-NEXT: movaps %xmm0, (%rdi) 181; CHECK-NEXT: movaps %xmm1, 16(%rdi) 182; CHECK-NEXT: movaps %xmm2, 32(%rdi) 183; CHECK-NEXT: movaps %xmm3, 48(%rdi) 184; CHECK-NEXT: movaps %xmm4, 64(%rdi) 185; CHECK-NEXT: movaps %xmm5, 80(%rdi) 186; CHECK-NEXT: movaps %xmm6, 96(%rdi) 187; CHECK-NEXT: movaps %xmm7, 112(%rdi) 188; CHECK-NEXT: retq 189entry: 190 %0 = load <2 x i64>, <2 x i64>* %idata, align 16 191 %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1 192 %2 = load <2 x i64>, <2 x i64>* %1, align 16 193 %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2 194 %4 = load <2 x i64>, <2 x i64>* %3, align 16 195 %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3 196 %6 = load <2 x i64>, <2 x i64>* %5, align 16 197 %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4 198 %8 = load <2 x i64>, <2 x i64>* %7, align 16 199 %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5 200 %10 = load <2 x i64>, <2 x i64>* %9, align 16 201 %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6 202 %12 = load <2 x i64>, <2 x i64>* %11, align 16 203 %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7 204 %14 = load <2 x i64>, <2 x i64>* %13, align 16 205 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 206 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 207 store <2 x i64> %16, <2 x i64>* %odata, align 16 208 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 209 %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1 210 store <2 x i64> %17, <2 x i64>* %18, align 16 211 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 212 %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2 213 store <2 x i64> %19, <2 x i64>* %20, align 16 214 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 215 %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3 216 store <2 x i64> %21, <2 x i64>* %22, align 16 217 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 218 %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4 219 store <2 x i64> %23, <2 x i64>* %24, align 16 220 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 221 %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5 222 store <2 x i64> %25, <2 x i64>* %26, align 16 223 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 224 %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6 225 store <2 x i64> %27, <2 x i64>* %28, align 16 226 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 227 %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7 228 store <2 x i64> %29, <2 x i64>* %30, align 16 229 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 230 ret i8 %31 231} 232 233define zeroext i8 @test__mm_aesdecwide128kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) { 234; CHECK-LABEL: test__mm_aesdecwide128kl_u8: 235; CHECK: # %bb.0: # %entry 236; CHECK-NEXT: movaps (%rsi), %xmm0 237; CHECK-NEXT: movaps 16(%rsi), %xmm1 238; CHECK-NEXT: movaps 32(%rsi), %xmm2 239; CHECK-NEXT: movaps 48(%rsi), %xmm3 240; CHECK-NEXT: movaps 64(%rsi), %xmm4 241; CHECK-NEXT: movaps 80(%rsi), %xmm5 242; CHECK-NEXT: movaps 96(%rsi), %xmm6 243; CHECK-NEXT: movaps 112(%rsi), %xmm7 244; CHECK-NEXT: xorl %eax, %eax 245; CHECK-NEXT: aesdecwide128kl (%rdx) 246; CHECK-NEXT: sete %al 247; CHECK-NEXT: movaps %xmm0, (%rdi) 248; CHECK-NEXT: movaps %xmm1, 16(%rdi) 249; CHECK-NEXT: movaps %xmm2, 32(%rdi) 250; CHECK-NEXT: movaps %xmm3, 48(%rdi) 251; CHECK-NEXT: movaps %xmm4, 64(%rdi) 252; CHECK-NEXT: movaps %xmm5, 80(%rdi) 253; CHECK-NEXT: movaps %xmm6, 96(%rdi) 254; CHECK-NEXT: movaps %xmm7, 112(%rdi) 255; CHECK-NEXT: retq 256entry: 257 %0 = load <2 x i64>, <2 x i64>* %idata, align 16 258 %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1 259 %2 = load <2 x i64>, <2 x i64>* %1, align 16 260 %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2 261 %4 = load <2 x i64>, <2 x i64>* %3, align 16 262 %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3 263 %6 = load <2 x i64>, <2 x i64>* %5, align 16 264 %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4 265 %8 = load <2 x i64>, <2 x i64>* %7, align 16 266 %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5 267 %10 = load <2 x i64>, <2 x i64>* %9, align 16 268 %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6 269 %12 = load <2 x i64>, <2 x i64>* %11, align 16 270 %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7 271 %14 = load <2 x i64>, <2 x i64>* %13, align 16 272 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 273 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 274 store <2 x i64> %16, <2 x i64>* %odata, align 16 275 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 276 %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1 277 store <2 x i64> %17, <2 x i64>* %18, align 16 278 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 279 %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2 280 store <2 x i64> %19, <2 x i64>* %20, align 16 281 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 282 %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3 283 store <2 x i64> %21, <2 x i64>* %22, align 16 284 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 285 %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4 286 store <2 x i64> %23, <2 x i64>* %24, align 16 287 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 288 %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5 289 store <2 x i64> %25, <2 x i64>* %26, align 16 290 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 291 %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6 292 store <2 x i64> %27, <2 x i64>* %28, align 16 293 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 294 %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7 295 store <2 x i64> %29, <2 x i64>* %30, align 16 296 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 297 ret i8 %31 298} 299 300define zeroext i8 @test__mm_aesencwide256kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) { 301; CHECK-LABEL: test__mm_aesencwide256kl_u8: 302; CHECK: # %bb.0: # %entry 303; CHECK-NEXT: movaps (%rsi), %xmm0 304; CHECK-NEXT: movaps 16(%rsi), %xmm1 305; CHECK-NEXT: movaps 32(%rsi), %xmm2 306; CHECK-NEXT: movaps 48(%rsi), %xmm3 307; CHECK-NEXT: movaps 64(%rsi), %xmm4 308; CHECK-NEXT: movaps 80(%rsi), %xmm5 309; CHECK-NEXT: movaps 96(%rsi), %xmm6 310; CHECK-NEXT: movaps 112(%rsi), %xmm7 311; CHECK-NEXT: xorl %eax, %eax 312; CHECK-NEXT: aesencwide256kl (%rdx) 313; CHECK-NEXT: sete %al 314; CHECK-NEXT: movaps %xmm0, (%rdi) 315; CHECK-NEXT: movaps %xmm1, 16(%rdi) 316; CHECK-NEXT: movaps %xmm2, 32(%rdi) 317; CHECK-NEXT: movaps %xmm3, 48(%rdi) 318; CHECK-NEXT: movaps %xmm4, 64(%rdi) 319; CHECK-NEXT: movaps %xmm5, 80(%rdi) 320; CHECK-NEXT: movaps %xmm6, 96(%rdi) 321; CHECK-NEXT: movaps %xmm7, 112(%rdi) 322; CHECK-NEXT: retq 323entry: 324 %0 = load <2 x i64>, <2 x i64>* %idata, align 16 325 %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1 326 %2 = load <2 x i64>, <2 x i64>* %1, align 16 327 %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2 328 %4 = load <2 x i64>, <2 x i64>* %3, align 16 329 %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3 330 %6 = load <2 x i64>, <2 x i64>* %5, align 16 331 %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4 332 %8 = load <2 x i64>, <2 x i64>* %7, align 16 333 %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5 334 %10 = load <2 x i64>, <2 x i64>* %9, align 16 335 %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6 336 %12 = load <2 x i64>, <2 x i64>* %11, align 16 337 %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7 338 %14 = load <2 x i64>, <2 x i64>* %13, align 16 339 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 340 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 341 store <2 x i64> %16, <2 x i64>* %odata, align 16 342 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 343 %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1 344 store <2 x i64> %17, <2 x i64>* %18, align 16 345 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 346 %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2 347 store <2 x i64> %19, <2 x i64>* %20, align 16 348 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 349 %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3 350 store <2 x i64> %21, <2 x i64>* %22, align 16 351 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 352 %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4 353 store <2 x i64> %23, <2 x i64>* %24, align 16 354 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 355 %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5 356 store <2 x i64> %25, <2 x i64>* %26, align 16 357 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 358 %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6 359 store <2 x i64> %27, <2 x i64>* %28, align 16 360 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 361 %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7 362 store <2 x i64> %29, <2 x i64>* %30, align 16 363 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 364 ret i8 %31 365} 366 367define zeroext i8 @test__mm_aesdecwide256kl_u8(<2 x i64>* %odata, <2 x i64>* %idata, i8* %h) { 368; CHECK-LABEL: test__mm_aesdecwide256kl_u8: 369; CHECK: # %bb.0: # %entry 370; CHECK-NEXT: movaps (%rsi), %xmm0 371; CHECK-NEXT: movaps 16(%rsi), %xmm1 372; CHECK-NEXT: movaps 32(%rsi), %xmm2 373; CHECK-NEXT: movaps 48(%rsi), %xmm3 374; CHECK-NEXT: movaps 64(%rsi), %xmm4 375; CHECK-NEXT: movaps 80(%rsi), %xmm5 376; CHECK-NEXT: movaps 96(%rsi), %xmm6 377; CHECK-NEXT: movaps 112(%rsi), %xmm7 378; CHECK-NEXT: xorl %eax, %eax 379; CHECK-NEXT: aesdecwide256kl (%rdx) 380; CHECK-NEXT: sete %al 381; CHECK-NEXT: movaps %xmm0, (%rdi) 382; CHECK-NEXT: movaps %xmm1, 16(%rdi) 383; CHECK-NEXT: movaps %xmm2, 32(%rdi) 384; CHECK-NEXT: movaps %xmm3, 48(%rdi) 385; CHECK-NEXT: movaps %xmm4, 64(%rdi) 386; CHECK-NEXT: movaps %xmm5, 80(%rdi) 387; CHECK-NEXT: movaps %xmm6, 96(%rdi) 388; CHECK-NEXT: movaps %xmm7, 112(%rdi) 389; CHECK-NEXT: retq 390entry: 391 %0 = load <2 x i64>, <2 x i64>* %idata, align 16 392 %1 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 1 393 %2 = load <2 x i64>, <2 x i64>* %1, align 16 394 %3 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 2 395 %4 = load <2 x i64>, <2 x i64>* %3, align 16 396 %5 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 3 397 %6 = load <2 x i64>, <2 x i64>* %5, align 16 398 %7 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 4 399 %8 = load <2 x i64>, <2 x i64>* %7, align 16 400 %9 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 5 401 %10 = load <2 x i64>, <2 x i64>* %9, align 16 402 %11 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 6 403 %12 = load <2 x i64>, <2 x i64>* %11, align 16 404 %13 = getelementptr <2 x i64>, <2 x i64>* %idata, i64 7 405 %14 = load <2 x i64>, <2 x i64>* %13, align 16 406 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 407 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 408 store <2 x i64> %16, <2 x i64>* %odata, align 16 409 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 410 %18 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 1 411 store <2 x i64> %17, <2 x i64>* %18, align 16 412 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 413 %20 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 2 414 store <2 x i64> %19, <2 x i64>* %20, align 16 415 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 416 %22 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 3 417 store <2 x i64> %21, <2 x i64>* %22, align 16 418 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 419 %24 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 4 420 store <2 x i64> %23, <2 x i64>* %24, align 16 421 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 422 %26 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 5 423 store <2 x i64> %25, <2 x i64>* %26, align 16 424 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 425 %28 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 6 426 store <2 x i64> %27, <2 x i64>* %28, align 16 427 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 428 %30 = getelementptr <2 x i64>, <2 x i64>* %odata, i64 7 429 store <2 x i64> %29, <2 x i64>* %30, align 16 430 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 431 ret i8 %31 432} 433 434declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32) 435declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>) 436declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>) 437declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, i8*) 438declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, i8*) 439declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, i8*) 440declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, i8*) 441declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) 442declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) 443declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) 444declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) 445