1; Test insertions of memory into the low byte of an i64. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s 4 5; Check a plain insertion with (or (and ... -0xff) (zext (load ....))). 6; The whole sequence can be performed by IC. 7define i64 @f1(i64 %orig, i8 *%ptr) { 8; CHECK-LABEL: f1: 9; CHECK-NOT: ni 10; CHECK: ic %r2, 0(%r3) 11; CHECK: br %r14 12 %val = load i8, i8 *%ptr 13 %ptr2 = zext i8 %val to i64 14 %ptr1 = and i64 %orig, -256 15 %or = or i64 %ptr1, %ptr2 16 ret i64 %or 17} 18 19; Like f1, but with the operands reversed. 20define i64 @f2(i64 %orig, i8 *%ptr) { 21; CHECK-LABEL: f2: 22; CHECK-NOT: ni 23; CHECK: ic %r2, 0(%r3) 24; CHECK: br %r14 25 %val = load i8, i8 *%ptr 26 %ptr2 = zext i8 %val to i64 27 %ptr1 = and i64 %orig, -256 28 %or = or i64 %ptr2, %ptr1 29 ret i64 %or 30} 31 32; Check a case where more bits than lower 8 are masked out of the 33; register value. We can use IC but must keep the original mask. 34define i64 @f3(i64 %orig, i8 *%ptr) { 35; CHECK-LABEL: f3: 36; CHECK: nill %r2, 65024 37; CHECK: ic %r2, 0(%r3) 38; CHECK: br %r14 39 %val = load i8, i8 *%ptr 40 %ptr2 = zext i8 %val to i64 41 %ptr1 = and i64 %orig, -512 42 %or = or i64 %ptr1, %ptr2 43 ret i64 %or 44} 45 46; Like f3, but with the operands reversed. 47define i64 @f4(i64 %orig, i8 *%ptr) { 48; CHECK-LABEL: f4: 49; CHECK: nill %r2, 65024 50; CHECK: ic %r2, 0(%r3) 51; CHECK: br %r14 52 %val = load i8, i8 *%ptr 53 %ptr2 = zext i8 %val to i64 54 %ptr1 = and i64 %orig, -512 55 %or = or i64 %ptr2, %ptr1 56 ret i64 %or 57} 58 59; Check a case where the low 8 bits are cleared by a shift left. 60define i64 @f5(i64 %orig, i8 *%ptr) { 61; CHECK-LABEL: f5: 62; CHECK: sllg %r2, %r2, 8 63; CHECK: ic %r2, 0(%r3) 64; CHECK: br %r14 65 %val = load i8, i8 *%ptr 66 %ptr2 = zext i8 %val to i64 67 %ptr1 = shl i64 %orig, 8 68 %or = or i64 %ptr1, %ptr2 69 ret i64 %or 70} 71 72; Like f5, but with the operands reversed. 73define i64 @f6(i64 %orig, i8 *%ptr) { 74; CHECK-LABEL: f6: 75; CHECK: sllg %r2, %r2, 8 76; CHECK: ic %r2, 0(%r3) 77; CHECK: br %r14 78 %val = load i8, i8 *%ptr 79 %ptr2 = zext i8 %val to i64 80 %ptr1 = shl i64 %orig, 8 81 %or = or i64 %ptr2, %ptr1 82 ret i64 %or 83} 84 85; Check insertions into a constant. 86define i64 @f7(i64 %orig, i8 *%ptr) { 87; CHECK-LABEL: f7: 88; CHECK: lghi %r2, 256 89; CHECK: ic %r2, 0(%r3) 90; CHECK: br %r14 91 %val = load i8, i8 *%ptr 92 %ptr2 = zext i8 %val to i64 93 %or = or i64 %ptr2, 256 94 ret i64 %or 95} 96 97; Like f7, but with the operands reversed. 98define i64 @f8(i64 %orig, i8 *%ptr) { 99; CHECK-LABEL: f8: 100; CHECK: lghi %r2, 256 101; CHECK: ic %r2, 0(%r3) 102; CHECK: br %r14 103 %val = load i8, i8 *%ptr 104 %ptr2 = zext i8 %val to i64 105 %or = or i64 256, %ptr2 106 ret i64 %or 107} 108 109; Check the high end of the IC range. 110define i64 @f9(i64 %orig, i8 *%src) { 111; CHECK-LABEL: f9: 112; CHECK: ic %r2, 4095(%r3) 113; CHECK: br %r14 114 %ptr = getelementptr i8, i8 *%src, i64 4095 115 %val = load i8, i8 *%ptr 116 %src2 = zext i8 %val to i64 117 %src1 = and i64 %orig, -256 118 %or = or i64 %src2, %src1 119 ret i64 %or 120} 121 122; Check the next byte up, which should use ICY instead of IC. 123define i64 @f10(i64 %orig, i8 *%src) { 124; CHECK-LABEL: f10: 125; CHECK: icy %r2, 4096(%r3) 126; CHECK: br %r14 127 %ptr = getelementptr i8, i8 *%src, i64 4096 128 %val = load i8, i8 *%ptr 129 %src2 = zext i8 %val to i64 130 %src1 = and i64 %orig, -256 131 %or = or i64 %src2, %src1 132 ret i64 %or 133} 134 135; Check the high end of the ICY range. 136define i64 @f11(i64 %orig, i8 *%src) { 137; CHECK-LABEL: f11: 138; CHECK: icy %r2, 524287(%r3) 139; CHECK: br %r14 140 %ptr = getelementptr i8, i8 *%src, i64 524287 141 %val = load i8, i8 *%ptr 142 %src2 = zext i8 %val to i64 143 %src1 = and i64 %orig, -256 144 %or = or i64 %src2, %src1 145 ret i64 %or 146} 147 148; Check the next byte up, which needs separate address logic. 149; Other sequences besides this one would be OK. 150define i64 @f12(i64 %orig, i8 *%src) { 151; CHECK-LABEL: f12: 152; CHECK: agfi %r3, 524288 153; CHECK: ic %r2, 0(%r3) 154; CHECK: br %r14 155 %ptr = getelementptr i8, i8 *%src, i64 524288 156 %val = load i8, i8 *%ptr 157 %src2 = zext i8 %val to i64 158 %src1 = and i64 %orig, -256 159 %or = or i64 %src2, %src1 160 ret i64 %or 161} 162 163; Check the high end of the negative ICY range. 164define i64 @f13(i64 %orig, i8 *%src) { 165; CHECK-LABEL: f13: 166; CHECK: icy %r2, -1(%r3) 167; CHECK: br %r14 168 %ptr = getelementptr i8, i8 *%src, i64 -1 169 %val = load i8, i8 *%ptr 170 %src2 = zext i8 %val to i64 171 %src1 = and i64 %orig, -256 172 %or = or i64 %src2, %src1 173 ret i64 %or 174} 175 176; Check the low end of the ICY range. 177define i64 @f14(i64 %orig, i8 *%src) { 178; CHECK-LABEL: f14: 179; CHECK: icy %r2, -524288(%r3) 180; CHECK: br %r14 181 %ptr = getelementptr i8, i8 *%src, i64 -524288 182 %val = load i8, i8 *%ptr 183 %src2 = zext i8 %val to i64 184 %src1 = and i64 %orig, -256 185 %or = or i64 %src2, %src1 186 ret i64 %or 187} 188 189; Check the next byte down, which needs separate address logic. 190; Other sequences besides this one would be OK. 191define i64 @f15(i64 %orig, i8 *%src) { 192; CHECK-LABEL: f15: 193; CHECK: agfi %r3, -524289 194; CHECK: ic %r2, 0(%r3) 195; CHECK: br %r14 196 %ptr = getelementptr i8, i8 *%src, i64 -524289 197 %val = load i8, i8 *%ptr 198 %src2 = zext i8 %val to i64 199 %src1 = and i64 %orig, -256 200 %or = or i64 %src2, %src1 201 ret i64 %or 202} 203 204; Check that IC allows an index. 205define i64 @f16(i64 %orig, i8 *%src, i64 %index) { 206; CHECK-LABEL: f16: 207; CHECK: ic %r2, 4095({{%r4,%r3|%r3,%r4}}) 208; CHECK: br %r14 209 %ptr1 = getelementptr i8, i8 *%src, i64 %index 210 %ptr2 = getelementptr i8, i8 *%ptr1, i64 4095 211 %val = load i8, i8 *%ptr2 212 %src2 = zext i8 %val to i64 213 %src1 = and i64 %orig, -256 214 %or = or i64 %src2, %src1 215 ret i64 %or 216} 217 218; Check that ICY allows an index. 219define i64 @f17(i64 %orig, i8 *%src, i64 %index) { 220; CHECK-LABEL: f17: 221; CHECK: icy %r2, 4096({{%r4,%r3|%r3,%r4}}) 222; CHECK: br %r14 223 %ptr1 = getelementptr i8, i8 *%src, i64 %index 224 %ptr2 = getelementptr i8, i8 *%ptr1, i64 4096 225 %val = load i8, i8 *%ptr2 226 %src2 = zext i8 %val to i64 227 %src1 = and i64 %orig, -256 228 %or = or i64 %src2, %src1 229 ret i64 %or 230} 231