1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sha < %s | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-unknown" 6 7; Stack reload folding tests. 8; 9; By including a nop call with sideeffects we can force a partial register spill of the 10; relevant registers and check that the reload is correctly folded into the instruction. 11 12define <4 x i32> @stack_fold_sha1msg1(<4 x i32> %a0, <4 x i32> %a1) { 13; CHECK-LABEL: stack_fold_sha1msg1: 14; CHECK: # %bb.0: 15; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 16; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 17; CHECK-NEXT: #APP 18; CHECK-NEXT: nop 19; CHECK-NEXT: #NO_APP 20; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 21; CHECK-NEXT: sha1msg1 {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 22; CHECK-NEXT: retq 23 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 24 %2 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a0, <4 x i32> %a1) 25 ret <4 x i32> %2 26} 27declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone 28 29define <4 x i32> @stack_fold_sha1msg2(<4 x i32> %a0, <4 x i32> %a1) { 30; CHECK-LABEL: stack_fold_sha1msg2: 31; CHECK: # %bb.0: 32; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 33; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 34; CHECK-NEXT: #APP 35; CHECK-NEXT: nop 36; CHECK-NEXT: #NO_APP 37; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 38; CHECK-NEXT: sha1msg2 {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 39; CHECK-NEXT: retq 40 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 41 %2 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a0, <4 x i32> %a1) 42 ret <4 x i32> %2 43} 44declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone 45 46define <4 x i32> @stack_fold_sha1nexte(<4 x i32> %a0, <4 x i32> %a1) { 47; CHECK-LABEL: stack_fold_sha1nexte: 48; CHECK: # %bb.0: 49; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 50; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 51; CHECK-NEXT: #APP 52; CHECK-NEXT: nop 53; CHECK-NEXT: #NO_APP 54; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 55; CHECK-NEXT: sha1nexte {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 56; CHECK-NEXT: retq 57 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 58 %2 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a0, <4 x i32> %a1) 59 ret <4 x i32> %2 60} 61declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone 62 63define <4 x i32> @stack_fold_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1) { 64; CHECK-LABEL: stack_fold_sha1rnds4: 65; CHECK: # %bb.0: 66; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 67; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 68; CHECK-NEXT: #APP 69; CHECK-NEXT: nop 70; CHECK-NEXT: #NO_APP 71; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 72; CHECK-NEXT: sha1rnds4 $3, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 73; CHECK-NEXT: retq 74 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 75 %2 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, i8 3) 76 ret <4 x i32> %2 77} 78declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone 79 80define <4 x i32> @stack_fold_sha256msg1(<4 x i32> %a0, <4 x i32> %a1) { 81; CHECK-LABEL: stack_fold_sha256msg1: 82; CHECK: # %bb.0: 83; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 84; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 85; CHECK-NEXT: #APP 86; CHECK-NEXT: nop 87; CHECK-NEXT: #NO_APP 88; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 89; CHECK-NEXT: sha256msg1 {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 90; CHECK-NEXT: retq 91 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 92 %2 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a0, <4 x i32> %a1) 93 ret <4 x i32> %2 94} 95declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone 96 97define <4 x i32> @stack_fold_sha256msg2(<4 x i32> %a0, <4 x i32> %a1) { 98; CHECK-LABEL: stack_fold_sha256msg2: 99; CHECK: # %bb.0: 100; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 101; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 102; CHECK-NEXT: #APP 103; CHECK-NEXT: nop 104; CHECK-NEXT: #NO_APP 105; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 106; CHECK-NEXT: sha256msg2 {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 107; CHECK-NEXT: retq 108 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 109 %2 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a0, <4 x i32> %a1) 110 ret <4 x i32> %2 111} 112declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone 113 114define <4 x i32> @stack_fold_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { 115; CHECK-LABEL: stack_fold_sha256rnds2: 116; CHECK: # %bb.0: 117; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 118; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 119; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 120; CHECK-NEXT: #APP 121; CHECK-NEXT: nop 122; CHECK-NEXT: #NO_APP 123; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 124; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 125; CHECK-NEXT: sha256rnds2 %xmm0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 126; CHECK-NEXT: movaps %xmm1, %xmm0 127; CHECK-NEXT: retq 128 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 129 %2 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) 130 ret <4 x i32> %2 131} 132declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 133