1; RUN: llc < %s -march=x86 | FileCheck %s
2
3define i32 @t1(i8* %X, i32 %i) {
4; CHECK-LABEL: t1:
5; CHECK-NOT: and
6; CHECK: movzbl
7; CHECK: movl (%{{...}},%{{...}},4),
8; CHECK: ret
9
10entry:
11  %tmp2 = shl i32 %i, 2
12  %tmp4 = and i32 %tmp2, 1020
13  %tmp7 = getelementptr i8* %X, i32 %tmp4
14  %tmp78 = bitcast i8* %tmp7 to i32*
15  %tmp9 = load i32* %tmp78
16  ret i32 %tmp9
17}
18
19define i32 @t2(i16* %X, i32 %i) {
20; CHECK-LABEL: t2:
21; CHECK-NOT: and
22; CHECK: movzwl
23; CHECK: movl (%{{...}},%{{...}},4),
24; CHECK: ret
25
26entry:
27  %tmp2 = shl i32 %i, 1
28  %tmp4 = and i32 %tmp2, 131070
29  %tmp7 = getelementptr i16* %X, i32 %tmp4
30  %tmp78 = bitcast i16* %tmp7 to i32*
31  %tmp9 = load i32* %tmp78
32  ret i32 %tmp9
33}
34
35define i32 @t3(i16* %i.ptr, i32* %arr) {
36; This case is tricky. The lshr followed by a gep will produce a lshr followed
37; by an and to remove the low bits. This can be simplified by doing the lshr by
38; a greater constant and using the addressing mode to scale the result back up.
39; To make matters worse, because of the two-phase zext of %i and their reuse in
40; the function, the DAG can get confusing trying to re-use both of them and
41; prevent easy analysis of the mask in order to match this.
42; CHECK-LABEL: t3:
43; CHECK-NOT: and
44; CHECK: shrl
45; CHECK: addl (%{{...}},%{{...}},4),
46; CHECK: ret
47
48entry:
49  %i = load i16* %i.ptr
50  %i.zext = zext i16 %i to i32
51  %index = lshr i32 %i.zext, 11
52  %val.ptr = getelementptr inbounds i32* %arr, i32 %index
53  %val = load i32* %val.ptr
54  %sum = add i32 %val, %i.zext
55  ret i32 %sum
56}
57
58define i32 @t4(i16* %i.ptr, i32* %arr) {
59; A version of @t3 that has more zero extends and more re-use of intermediate
60; values. This exercise slightly different bits of canonicalization.
61; CHECK-LABEL: t4:
62; CHECK-NOT: and
63; CHECK: shrl
64; CHECK: addl (%{{...}},%{{...}},4),
65; CHECK: ret
66
67entry:
68  %i = load i16* %i.ptr
69  %i.zext = zext i16 %i to i32
70  %index = lshr i32 %i.zext, 11
71  %index.zext = zext i32 %index to i64
72  %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
73  %val = load i32* %val.ptr
74  %sum.1 = add i32 %val, %i.zext
75  %sum.2 = add i32 %sum.1, %index
76  ret i32 %sum.2
77}
78