1; Test 32-bit square root. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ 4; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s 5; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s 6 7declare float @llvm.sqrt.f32(float) 8declare float @sqrtf(float) 9 10; Check register square root. 11define float @f1(float %val) { 12; CHECK-LABEL: f1: 13; CHECK: sqebr %f0, %f0 14; CHECK: br %r14 15 %res = call float @llvm.sqrt.f32(float %val) 16 ret float %res 17} 18 19; Check the low end of the SQEB range. 20define float @f2(float *%ptr) { 21; CHECK-LABEL: f2: 22; CHECK: sqeb %f0, 0(%r2) 23; CHECK: br %r14 24 %val = load float, float *%ptr 25 %res = call float @llvm.sqrt.f32(float %val) 26 ret float %res 27} 28 29; Check the high end of the aligned SQEB range. 30define float @f3(float *%base) { 31; CHECK-LABEL: f3: 32; CHECK: sqeb %f0, 4092(%r2) 33; CHECK: br %r14 34 %ptr = getelementptr float, float *%base, i64 1023 35 %val = load float, float *%ptr 36 %res = call float @llvm.sqrt.f32(float %val) 37 ret float %res 38} 39 40; Check the next word up, which needs separate address logic. 41; Other sequences besides this one would be OK. 42define float @f4(float *%base) { 43; CHECK-LABEL: f4: 44; CHECK: aghi %r2, 4096 45; CHECK: sqeb %f0, 0(%r2) 46; CHECK: br %r14 47 %ptr = getelementptr float, float *%base, i64 1024 48 %val = load float, float *%ptr 49 %res = call float @llvm.sqrt.f32(float %val) 50 ret float %res 51} 52 53; Check negative displacements, which also need separate address logic. 54define float @f5(float *%base) { 55; CHECK-LABEL: f5: 56; CHECK: aghi %r2, -4 57; CHECK: sqeb %f0, 0(%r2) 58; CHECK: br %r14 59 %ptr = getelementptr float, float *%base, i64 -1 60 %val = load float, float *%ptr 61 %res = call float @llvm.sqrt.f32(float %val) 62 ret float %res 63} 64 65; Check that SQEB allows indices. 66define float @f6(float *%base, i64 %index) { 67; CHECK-LABEL: f6: 68; CHECK: sllg %r1, %r3, 2 69; CHECK: sqeb %f0, 400(%r1,%r2) 70; CHECK: br %r14 71 %ptr1 = getelementptr float, float *%base, i64 %index 72 %ptr2 = getelementptr float, float *%ptr1, i64 100 73 %val = load float, float *%ptr2 74 %res = call float @llvm.sqrt.f32(float %val) 75 ret float %res 76} 77 78; Test a case where we spill the source of at least one SQEBR. We want 79; to use SQEB if possible. 80define void @f7(float *%ptr) { 81; CHECK-LABEL: f7: 82; CHECK-SCALAR: sqeb {{%f[0-9]+}}, 16{{[04]}}(%r15) 83; CHECK: br %r14 84 %val0 = load volatile float, float *%ptr 85 %val1 = load volatile float, float *%ptr 86 %val2 = load volatile float, float *%ptr 87 %val3 = load volatile float, float *%ptr 88 %val4 = load volatile float, float *%ptr 89 %val5 = load volatile float, float *%ptr 90 %val6 = load volatile float, float *%ptr 91 %val7 = load volatile float, float *%ptr 92 %val8 = load volatile float, float *%ptr 93 %val9 = load volatile float, float *%ptr 94 %val10 = load volatile float, float *%ptr 95 %val11 = load volatile float, float *%ptr 96 %val12 = load volatile float, float *%ptr 97 %val13 = load volatile float, float *%ptr 98 %val14 = load volatile float, float *%ptr 99 %val15 = load volatile float, float *%ptr 100 %val16 = load volatile float, float *%ptr 101 102 %sqrt0 = call float @llvm.sqrt.f32(float %val0) 103 %sqrt1 = call float @llvm.sqrt.f32(float %val1) 104 %sqrt2 = call float @llvm.sqrt.f32(float %val2) 105 %sqrt3 = call float @llvm.sqrt.f32(float %val3) 106 %sqrt4 = call float @llvm.sqrt.f32(float %val4) 107 %sqrt5 = call float @llvm.sqrt.f32(float %val5) 108 %sqrt6 = call float @llvm.sqrt.f32(float %val6) 109 %sqrt7 = call float @llvm.sqrt.f32(float %val7) 110 %sqrt8 = call float @llvm.sqrt.f32(float %val8) 111 %sqrt9 = call float @llvm.sqrt.f32(float %val9) 112 %sqrt10 = call float @llvm.sqrt.f32(float %val10) 113 %sqrt11 = call float @llvm.sqrt.f32(float %val11) 114 %sqrt12 = call float @llvm.sqrt.f32(float %val12) 115 %sqrt13 = call float @llvm.sqrt.f32(float %val13) 116 %sqrt14 = call float @llvm.sqrt.f32(float %val14) 117 %sqrt15 = call float @llvm.sqrt.f32(float %val15) 118 %sqrt16 = call float @llvm.sqrt.f32(float %val16) 119 120 store volatile float %val0, float *%ptr 121 store volatile float %val1, float *%ptr 122 store volatile float %val2, float *%ptr 123 store volatile float %val3, float *%ptr 124 store volatile float %val4, float *%ptr 125 store volatile float %val5, float *%ptr 126 store volatile float %val6, float *%ptr 127 store volatile float %val7, float *%ptr 128 store volatile float %val8, float *%ptr 129 store volatile float %val9, float *%ptr 130 store volatile float %val10, float *%ptr 131 store volatile float %val11, float *%ptr 132 store volatile float %val12, float *%ptr 133 store volatile float %val13, float *%ptr 134 store volatile float %val14, float *%ptr 135 store volatile float %val15, float *%ptr 136 store volatile float %val16, float *%ptr 137 138 store volatile float %sqrt0, float *%ptr 139 store volatile float %sqrt1, float *%ptr 140 store volatile float %sqrt2, float *%ptr 141 store volatile float %sqrt3, float *%ptr 142 store volatile float %sqrt4, float *%ptr 143 store volatile float %sqrt5, float *%ptr 144 store volatile float %sqrt6, float *%ptr 145 store volatile float %sqrt7, float *%ptr 146 store volatile float %sqrt8, float *%ptr 147 store volatile float %sqrt9, float *%ptr 148 store volatile float %sqrt10, float *%ptr 149 store volatile float %sqrt11, float *%ptr 150 store volatile float %sqrt12, float *%ptr 151 store volatile float %sqrt13, float *%ptr 152 store volatile float %sqrt14, float *%ptr 153 store volatile float %sqrt15, float *%ptr 154 store volatile float %sqrt16, float *%ptr 155 156 ret void 157} 158 159; Check that a call to the normal sqrtf function is lowered. 160define float @f8(float %dummy, float %val) { 161; CHECK-LABEL: f8: 162; CHECK: sqebr %f0, %f2 163; CHECK: cebr %f0, %f0 164; CHECK: bnor %r14 165; CHECK: {{ler|ldr}} %f0, %f2 166; CHECK: jg sqrtf@PLT 167 %res = tail call float @sqrtf(float %val) 168 ret float %res 169} 170