1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=BSWAP
3; RUN: llc < %s -mtriple=i686-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=MOVBE
4; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 --check-prefix=BSWAP64
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK64 --check-prefix=MOVBE64
6
7; i8* p;
8; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
9define i32 @load_i32_by_i8(i32* %arg) {
10; CHECK-LABEL: load_i32_by_i8:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
13; CHECK-NEXT:    movl (%eax), %eax
14; CHECK-NEXT:    retl
15;
16; CHECK64-LABEL: load_i32_by_i8:
17; CHECK64:       # %bb.0:
18; CHECK64-NEXT:    movl (%rdi), %eax
19; CHECK64-NEXT:    retq
20  %tmp = bitcast i32* %arg to i8*
21  %tmp1 = load i8, i8* %tmp, align 1
22  %tmp2 = zext i8 %tmp1 to i32
23  %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
24  %tmp4 = load i8, i8* %tmp3, align 1
25  %tmp5 = zext i8 %tmp4 to i32
26  %tmp6 = shl nuw nsw i32 %tmp5, 8
27  %tmp7 = or i32 %tmp6, %tmp2
28  %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
29  %tmp9 = load i8, i8* %tmp8, align 1
30  %tmp10 = zext i8 %tmp9 to i32
31  %tmp11 = shl nuw nsw i32 %tmp10, 16
32  %tmp12 = or i32 %tmp7, %tmp11
33  %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3
34  %tmp14 = load i8, i8* %tmp13, align 1
35  %tmp15 = zext i8 %tmp14 to i32
36  %tmp16 = shl nuw nsw i32 %tmp15, 24
37  %tmp17 = or i32 %tmp12, %tmp16
38  ret i32 %tmp17
39}
40
41; i8* p;
42; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
43define i32 @load_i32_by_i8_bswap(i32* %arg) {
44; BSWAP-LABEL: load_i32_by_i8_bswap:
45; BSWAP:       # %bb.0:
46; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
47; BSWAP-NEXT:    movl (%eax), %eax
48; BSWAP-NEXT:    bswapl %eax
49; BSWAP-NEXT:    retl
50;
51; MOVBE-LABEL: load_i32_by_i8_bswap:
52; MOVBE:       # %bb.0:
53; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
54; MOVBE-NEXT:    movbel (%eax), %eax
55; MOVBE-NEXT:    retl
56;
57; BSWAP64-LABEL: load_i32_by_i8_bswap:
58; BSWAP64:       # %bb.0:
59; BSWAP64-NEXT:    movl (%rdi), %eax
60; BSWAP64-NEXT:    bswapl %eax
61; BSWAP64-NEXT:    retq
62;
63; MOVBE64-LABEL: load_i32_by_i8_bswap:
64; MOVBE64:       # %bb.0:
65; MOVBE64-NEXT:    movbel (%rdi), %eax
66; MOVBE64-NEXT:    retq
67  %tmp = bitcast i32* %arg to i8*
68  %tmp1 = load i8, i8* %tmp, align 1
69  %tmp2 = zext i8 %tmp1 to i32
70  %tmp3 = shl nuw nsw i32 %tmp2, 24
71  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
72  %tmp5 = load i8, i8* %tmp4, align 1
73  %tmp6 = zext i8 %tmp5 to i32
74  %tmp7 = shl nuw nsw i32 %tmp6, 16
75  %tmp8 = or i32 %tmp7, %tmp3
76  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
77  %tmp10 = load i8, i8* %tmp9, align 1
78  %tmp11 = zext i8 %tmp10 to i32
79  %tmp12 = shl nuw nsw i32 %tmp11, 8
80  %tmp13 = or i32 %tmp8, %tmp12
81  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
82  %tmp15 = load i8, i8* %tmp14, align 1
83  %tmp16 = zext i8 %tmp15 to i32
84  %tmp17 = or i32 %tmp13, %tmp16
85  ret i32 %tmp17
86}
87
88; i16* p;
89; (i32) p[0] | ((i32) p[1] << 16)
90define i32 @load_i32_by_i16(i32* %arg) {
91; CHECK-LABEL: load_i32_by_i16:
92; CHECK:       # %bb.0:
93; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
94; CHECK-NEXT:    movl (%eax), %eax
95; CHECK-NEXT:    retl
96;
97; CHECK64-LABEL: load_i32_by_i16:
98; CHECK64:       # %bb.0:
99; CHECK64-NEXT:    movl (%rdi), %eax
100; CHECK64-NEXT:    retq
101  %tmp = bitcast i32* %arg to i16*
102  %tmp1 = load i16, i16* %tmp, align 1
103  %tmp2 = zext i16 %tmp1 to i32
104  %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
105  %tmp4 = load i16, i16* %tmp3, align 1
106  %tmp5 = zext i16 %tmp4 to i32
107  %tmp6 = shl nuw nsw i32 %tmp5, 16
108  %tmp7 = or i32 %tmp6, %tmp2
109  ret i32 %tmp7
110}
111
112; i16* p_16;
113; i8* p_8 = (i8*) p_16;
114; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
115define i32 @load_i32_by_i16_i8(i32* %arg) {
116; CHECK-LABEL: load_i32_by_i16_i8:
117; CHECK:       # %bb.0:
118; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
119; CHECK-NEXT:    movl (%eax), %eax
120; CHECK-NEXT:    retl
121;
122; CHECK64-LABEL: load_i32_by_i16_i8:
123; CHECK64:       # %bb.0:
124; CHECK64-NEXT:    movl (%rdi), %eax
125; CHECK64-NEXT:    retq
126  %tmp = bitcast i32* %arg to i16*
127  %tmp1 = bitcast i32* %arg to i8*
128  %tmp2 = load i16, i16* %tmp, align 1
129  %tmp3 = zext i16 %tmp2 to i32
130  %tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2
131  %tmp5 = load i8, i8* %tmp4, align 1
132  %tmp6 = zext i8 %tmp5 to i32
133  %tmp7 = shl nuw nsw i32 %tmp6, 16
134  %tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3
135  %tmp9 = load i8, i8* %tmp8, align 1
136  %tmp10 = zext i8 %tmp9 to i32
137  %tmp11 = shl nuw nsw i32 %tmp10, 24
138  %tmp12 = or i32 %tmp7, %tmp11
139  %tmp13 = or i32 %tmp12, %tmp3
140  ret i32 %tmp13
141}
142
143
144; i8* p;
145; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
146define i32 @load_i32_by_i16_by_i8(i32* %arg) {
147; CHECK-LABEL: load_i32_by_i16_by_i8:
148; CHECK:       # %bb.0:
149; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
150; CHECK-NEXT:    movl (%eax), %eax
151; CHECK-NEXT:    retl
152;
153; CHECK64-LABEL: load_i32_by_i16_by_i8:
154; CHECK64:       # %bb.0:
155; CHECK64-NEXT:    movl (%rdi), %eax
156; CHECK64-NEXT:    retq
157  %tmp = bitcast i32* %arg to i8*
158  %tmp1 = load i8, i8* %tmp, align 1
159  %tmp2 = zext i8 %tmp1 to i16
160  %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
161  %tmp4 = load i8, i8* %tmp3, align 1
162  %tmp5 = zext i8 %tmp4 to i16
163  %tmp6 = shl nuw nsw i16 %tmp5, 8
164  %tmp7 = or i16 %tmp6, %tmp2
165  %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
166  %tmp9 = load i8, i8* %tmp8, align 1
167  %tmp10 = zext i8 %tmp9 to i16
168  %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
169  %tmp12 = load i8, i8* %tmp11, align 1
170  %tmp13 = zext i8 %tmp12 to i16
171  %tmp14 = shl nuw nsw i16 %tmp13, 8
172  %tmp15 = or i16 %tmp14, %tmp10
173  %tmp16 = zext i16 %tmp7 to i32
174  %tmp17 = zext i16 %tmp15 to i32
175  %tmp18 = shl nuw nsw i32 %tmp17, 16
176  %tmp19 = or i32 %tmp18, %tmp16
177  ret i32 %tmp19
178}
179
180; i8* p;
181; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
182define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) {
183; BSWAP-LABEL: load_i32_by_i16_by_i8_bswap:
184; BSWAP:       # %bb.0:
185; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
186; BSWAP-NEXT:    movl (%eax), %eax
187; BSWAP-NEXT:    bswapl %eax
188; BSWAP-NEXT:    retl
189;
190; MOVBE-LABEL: load_i32_by_i16_by_i8_bswap:
191; MOVBE:       # %bb.0:
192; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
193; MOVBE-NEXT:    movbel (%eax), %eax
194; MOVBE-NEXT:    retl
195;
196; BSWAP64-LABEL: load_i32_by_i16_by_i8_bswap:
197; BSWAP64:       # %bb.0:
198; BSWAP64-NEXT:    movl (%rdi), %eax
199; BSWAP64-NEXT:    bswapl %eax
200; BSWAP64-NEXT:    retq
201;
202; MOVBE64-LABEL: load_i32_by_i16_by_i8_bswap:
203; MOVBE64:       # %bb.0:
204; MOVBE64-NEXT:    movbel (%rdi), %eax
205; MOVBE64-NEXT:    retq
206  %tmp = bitcast i32* %arg to i8*
207  %tmp1 = load i8, i8* %tmp, align 1
208  %tmp2 = zext i8 %tmp1 to i16
209  %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
210  %tmp4 = load i8, i8* %tmp3, align 1
211  %tmp5 = zext i8 %tmp4 to i16
212  %tmp6 = shl nuw nsw i16 %tmp2, 8
213  %tmp7 = or i16 %tmp6, %tmp5
214  %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
215  %tmp9 = load i8, i8* %tmp8, align 1
216  %tmp10 = zext i8 %tmp9 to i16
217  %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
218  %tmp12 = load i8, i8* %tmp11, align 1
219  %tmp13 = zext i8 %tmp12 to i16
220  %tmp14 = shl nuw nsw i16 %tmp10, 8
221  %tmp15 = or i16 %tmp14, %tmp13
222  %tmp16 = zext i16 %tmp7 to i32
223  %tmp17 = zext i16 %tmp15 to i32
224  %tmp18 = shl nuw nsw i32 %tmp16, 16
225  %tmp19 = or i32 %tmp18, %tmp17
226  ret i32 %tmp19
227}
228
229; i8* p;
230; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
231define i64 @load_i64_by_i8(i64* %arg) {
232; CHECK-LABEL: load_i64_by_i8:
233; CHECK:       # %bb.0:
234; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
235; CHECK-NEXT:    movl (%ecx), %eax
236; CHECK-NEXT:    movl 4(%ecx), %edx
237; CHECK-NEXT:    retl
238;
239; CHECK64-LABEL: load_i64_by_i8:
240; CHECK64:       # %bb.0:
241; CHECK64-NEXT:    movq (%rdi), %rax
242; CHECK64-NEXT:    retq
243  %tmp = bitcast i64* %arg to i8*
244  %tmp1 = load i8, i8* %tmp, align 1
245  %tmp2 = zext i8 %tmp1 to i64
246  %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
247  %tmp4 = load i8, i8* %tmp3, align 1
248  %tmp5 = zext i8 %tmp4 to i64
249  %tmp6 = shl nuw nsw i64 %tmp5, 8
250  %tmp7 = or i64 %tmp6, %tmp2
251  %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
252  %tmp9 = load i8, i8* %tmp8, align 1
253  %tmp10 = zext i8 %tmp9 to i64
254  %tmp11 = shl nuw nsw i64 %tmp10, 16
255  %tmp12 = or i64 %tmp7, %tmp11
256  %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
257  %tmp14 = load i8, i8* %tmp13, align 1
258  %tmp15 = zext i8 %tmp14 to i64
259  %tmp16 = shl nuw nsw i64 %tmp15, 24
260  %tmp17 = or i64 %tmp12, %tmp16
261  %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
262  %tmp19 = load i8, i8* %tmp18, align 1
263  %tmp20 = zext i8 %tmp19 to i64
264  %tmp21 = shl nuw nsw i64 %tmp20, 32
265  %tmp22 = or i64 %tmp17, %tmp21
266  %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
267  %tmp24 = load i8, i8* %tmp23, align 1
268  %tmp25 = zext i8 %tmp24 to i64
269  %tmp26 = shl nuw nsw i64 %tmp25, 40
270  %tmp27 = or i64 %tmp22, %tmp26
271  %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
272  %tmp29 = load i8, i8* %tmp28, align 1
273  %tmp30 = zext i8 %tmp29 to i64
274  %tmp31 = shl nuw nsw i64 %tmp30, 48
275  %tmp32 = or i64 %tmp27, %tmp31
276  %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
277  %tmp34 = load i8, i8* %tmp33, align 1
278  %tmp35 = zext i8 %tmp34 to i64
279  %tmp36 = shl nuw i64 %tmp35, 56
280  %tmp37 = or i64 %tmp32, %tmp36
281  ret i64 %tmp37
282}
283
284; i8* p;
285; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
286define i64 @load_i64_by_i8_bswap(i64* %arg) {
287; BSWAP-LABEL: load_i64_by_i8_bswap:
288; BSWAP:       # %bb.0:
289; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
290; BSWAP-NEXT:    movl (%eax), %edx
291; BSWAP-NEXT:    movl 4(%eax), %eax
292; BSWAP-NEXT:    bswapl %eax
293; BSWAP-NEXT:    bswapl %edx
294; BSWAP-NEXT:    retl
295;
296; MOVBE-LABEL: load_i64_by_i8_bswap:
297; MOVBE:       # %bb.0:
298; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
299; MOVBE-NEXT:    movbel 4(%ecx), %eax
300; MOVBE-NEXT:    movbel (%ecx), %edx
301; MOVBE-NEXT:    retl
302;
303; BSWAP64-LABEL: load_i64_by_i8_bswap:
304; BSWAP64:       # %bb.0:
305; BSWAP64-NEXT:    movq (%rdi), %rax
306; BSWAP64-NEXT:    bswapq %rax
307; BSWAP64-NEXT:    retq
308;
309; MOVBE64-LABEL: load_i64_by_i8_bswap:
310; MOVBE64:       # %bb.0:
311; MOVBE64-NEXT:    movbeq (%rdi), %rax
312; MOVBE64-NEXT:    retq
313  %tmp = bitcast i64* %arg to i8*
314  %tmp1 = load i8, i8* %tmp, align 1
315  %tmp2 = zext i8 %tmp1 to i64
316  %tmp3 = shl nuw i64 %tmp2, 56
317  %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
318  %tmp5 = load i8, i8* %tmp4, align 1
319  %tmp6 = zext i8 %tmp5 to i64
320  %tmp7 = shl nuw nsw i64 %tmp6, 48
321  %tmp8 = or i64 %tmp7, %tmp3
322  %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
323  %tmp10 = load i8, i8* %tmp9, align 1
324  %tmp11 = zext i8 %tmp10 to i64
325  %tmp12 = shl nuw nsw i64 %tmp11, 40
326  %tmp13 = or i64 %tmp8, %tmp12
327  %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
328  %tmp15 = load i8, i8* %tmp14, align 1
329  %tmp16 = zext i8 %tmp15 to i64
330  %tmp17 = shl nuw nsw i64 %tmp16, 32
331  %tmp18 = or i64 %tmp13, %tmp17
332  %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
333  %tmp20 = load i8, i8* %tmp19, align 1
334  %tmp21 = zext i8 %tmp20 to i64
335  %tmp22 = shl nuw nsw i64 %tmp21, 24
336  %tmp23 = or i64 %tmp18, %tmp22
337  %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
338  %tmp25 = load i8, i8* %tmp24, align 1
339  %tmp26 = zext i8 %tmp25 to i64
340  %tmp27 = shl nuw nsw i64 %tmp26, 16
341  %tmp28 = or i64 %tmp23, %tmp27
342  %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
343  %tmp30 = load i8, i8* %tmp29, align 1
344  %tmp31 = zext i8 %tmp30 to i64
345  %tmp32 = shl nuw nsw i64 %tmp31, 8
346  %tmp33 = or i64 %tmp28, %tmp32
347  %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
348  %tmp35 = load i8, i8* %tmp34, align 1
349  %tmp36 = zext i8 %tmp35 to i64
350  %tmp37 = or i64 %tmp33, %tmp36
351  ret i64 %tmp37
352}
353
354; Part of the load by bytes pattern is used outside of the pattern
355; i8* p;
356; i32 x = (i32) p[1]
357; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
358; x | res
359define i32 @load_i32_by_i8_bswap_uses(i32* %arg) {
360; CHECK-LABEL: load_i32_by_i8_bswap_uses:
361; CHECK:       # %bb.0:
362; CHECK-NEXT:    pushl %esi
363; CHECK-NEXT:    .cfi_def_cfa_offset 8
364; CHECK-NEXT:    .cfi_offset %esi, -8
365; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
366; CHECK-NEXT:    movzbl (%eax), %ecx
367; CHECK-NEXT:    shll $24, %ecx
368; CHECK-NEXT:    movzbl 1(%eax), %edx
369; CHECK-NEXT:    movl %edx, %esi
370; CHECK-NEXT:    shll $16, %esi
371; CHECK-NEXT:    orl %ecx, %esi
372; CHECK-NEXT:    movzbl 2(%eax), %ecx
373; CHECK-NEXT:    shll $8, %ecx
374; CHECK-NEXT:    orl %esi, %ecx
375; CHECK-NEXT:    movzbl 3(%eax), %eax
376; CHECK-NEXT:    orl %ecx, %eax
377; CHECK-NEXT:    orl %edx, %eax
378; CHECK-NEXT:    popl %esi
379; CHECK-NEXT:    .cfi_def_cfa_offset 4
380; CHECK-NEXT:    retl
381;
382; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
383; CHECK64:       # %bb.0:
384; CHECK64-NEXT:    movzbl (%rdi), %eax
385; CHECK64-NEXT:    shll $24, %eax
386; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
387; CHECK64-NEXT:    movl %ecx, %edx
388; CHECK64-NEXT:    shll $16, %edx
389; CHECK64-NEXT:    orl %eax, %edx
390; CHECK64-NEXT:    movzbl 2(%rdi), %esi
391; CHECK64-NEXT:    shll $8, %esi
392; CHECK64-NEXT:    orl %edx, %esi
393; CHECK64-NEXT:    movzbl 3(%rdi), %eax
394; CHECK64-NEXT:    orl %esi, %eax
395; CHECK64-NEXT:    orl %ecx, %eax
396; CHECK64-NEXT:    retq
397  %tmp = bitcast i32* %arg to i8*
398  %tmp1 = load i8, i8* %tmp, align 1
399  %tmp2 = zext i8 %tmp1 to i32
400  %tmp3 = shl nuw nsw i32 %tmp2, 24
401  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
402  %tmp5 = load i8, i8* %tmp4, align 1
403  %tmp6 = zext i8 %tmp5 to i32
404  %tmp7 = shl nuw nsw i32 %tmp6, 16
405  %tmp8 = or i32 %tmp7, %tmp3
406  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
407  %tmp10 = load i8, i8* %tmp9, align 1
408  %tmp11 = zext i8 %tmp10 to i32
409  %tmp12 = shl nuw nsw i32 %tmp11, 8
410  %tmp13 = or i32 %tmp8, %tmp12
411  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
412  %tmp15 = load i8, i8* %tmp14, align 1
413  %tmp16 = zext i8 %tmp15 to i32
414  %tmp17 = or i32 %tmp13, %tmp16
415  ; Use individual part of the pattern outside of the pattern
416  %tmp18 = or i32 %tmp6, %tmp17
417  ret i32 %tmp18
418}
419
420; One of the loads is volatile
421; i8* p;
422; p0 = volatile *p;
423; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
424define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) {
425; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
426; CHECK:       # %bb.0:
427; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
428; CHECK-NEXT:    movzbl (%eax), %ecx
429; CHECK-NEXT:    shll $24, %ecx
430; CHECK-NEXT:    movzbl 1(%eax), %edx
431; CHECK-NEXT:    shll $16, %edx
432; CHECK-NEXT:    orl %ecx, %edx
433; CHECK-NEXT:    movzbl 2(%eax), %ecx
434; CHECK-NEXT:    shll $8, %ecx
435; CHECK-NEXT:    orl %edx, %ecx
436; CHECK-NEXT:    movzbl 3(%eax), %eax
437; CHECK-NEXT:    orl %ecx, %eax
438; CHECK-NEXT:    retl
439;
440; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
441; CHECK64:       # %bb.0:
442; CHECK64-NEXT:    movzbl (%rdi), %eax
443; CHECK64-NEXT:    shll $24, %eax
444; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
445; CHECK64-NEXT:    shll $16, %ecx
446; CHECK64-NEXT:    orl %eax, %ecx
447; CHECK64-NEXT:    movzbl 2(%rdi), %edx
448; CHECK64-NEXT:    shll $8, %edx
449; CHECK64-NEXT:    orl %ecx, %edx
450; CHECK64-NEXT:    movzbl 3(%rdi), %eax
451; CHECK64-NEXT:    orl %edx, %eax
452; CHECK64-NEXT:    retq
453  %tmp = bitcast i32* %arg to i8*
454  %tmp1 = load volatile i8, i8* %tmp, align 1
455  %tmp2 = zext i8 %tmp1 to i32
456  %tmp3 = shl nuw nsw i32 %tmp2, 24
457  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
458  %tmp5 = load i8, i8* %tmp4, align 1
459  %tmp6 = zext i8 %tmp5 to i32
460  %tmp7 = shl nuw nsw i32 %tmp6, 16
461  %tmp8 = or i32 %tmp7, %tmp3
462  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
463  %tmp10 = load i8, i8* %tmp9, align 1
464  %tmp11 = zext i8 %tmp10 to i32
465  %tmp12 = shl nuw nsw i32 %tmp11, 8
466  %tmp13 = or i32 %tmp8, %tmp12
467  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
468  %tmp15 = load i8, i8* %tmp14, align 1
469  %tmp16 = zext i8 %tmp15 to i32
470  %tmp17 = or i32 %tmp13, %tmp16
471  ret i32 %tmp17
472}
473
474; There is a store in between individual loads
475; i8* p, q;
476; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
477; *q = 0;
478; res2 = ((i32) p[2] << 8) | (i32) p[3]
479; res1 | res2
480define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
481; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
482; CHECK:       # %bb.0:
483; CHECK-NEXT:    pushl %esi
484; CHECK-NEXT:    .cfi_def_cfa_offset 8
485; CHECK-NEXT:    .cfi_offset %esi, -8
486; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
487; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
488; CHECK-NEXT:    movzbl (%ecx), %edx
489; CHECK-NEXT:    shll $24, %edx
490; CHECK-NEXT:    movzbl 1(%ecx), %esi
491; CHECK-NEXT:    movl $0, (%eax)
492; CHECK-NEXT:    shll $16, %esi
493; CHECK-NEXT:    orl %edx, %esi
494; CHECK-NEXT:    movzbl 2(%ecx), %edx
495; CHECK-NEXT:    shll $8, %edx
496; CHECK-NEXT:    orl %esi, %edx
497; CHECK-NEXT:    movzbl 3(%ecx), %eax
498; CHECK-NEXT:    orl %edx, %eax
499; CHECK-NEXT:    popl %esi
500; CHECK-NEXT:    .cfi_def_cfa_offset 4
501; CHECK-NEXT:    retl
502;
503; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
504; CHECK64:       # %bb.0:
505; CHECK64-NEXT:    movzbl (%rdi), %eax
506; CHECK64-NEXT:    shll $24, %eax
507; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
508; CHECK64-NEXT:    movl $0, (%rsi)
509; CHECK64-NEXT:    shll $16, %ecx
510; CHECK64-NEXT:    orl %eax, %ecx
511; CHECK64-NEXT:    movzbl 2(%rdi), %edx
512; CHECK64-NEXT:    shll $8, %edx
513; CHECK64-NEXT:    orl %ecx, %edx
514; CHECK64-NEXT:    movzbl 3(%rdi), %eax
515; CHECK64-NEXT:    orl %edx, %eax
516; CHECK64-NEXT:    retq
517  %tmp = bitcast i32* %arg to i8*
518  %tmp2 = load i8, i8* %tmp, align 1
519  %tmp3 = zext i8 %tmp2 to i32
520  %tmp4 = shl nuw nsw i32 %tmp3, 24
521  %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
522  %tmp6 = load i8, i8* %tmp5, align 1
523  ; This store will prevent folding of the pattern
524  store i32 0, i32* %arg1
525  %tmp7 = zext i8 %tmp6 to i32
526  %tmp8 = shl nuw nsw i32 %tmp7, 16
527  %tmp9 = or i32 %tmp8, %tmp4
528  %tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2
529  %tmp11 = load i8, i8* %tmp10, align 1
530  %tmp12 = zext i8 %tmp11 to i32
531  %tmp13 = shl nuw nsw i32 %tmp12, 8
532  %tmp14 = or i32 %tmp9, %tmp13
533  %tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3
534  %tmp16 = load i8, i8* %tmp15, align 1
535  %tmp17 = zext i8 %tmp16 to i32
536  %tmp18 = or i32 %tmp14, %tmp17
537  ret i32 %tmp18
538}
539
540; One of the loads is from an unrelated location
541; i8* p, q;
542; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
543define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) {
544; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
545; CHECK:       # %bb.0:
546; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
547; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
548; CHECK-NEXT:    movzbl (%ecx), %edx
549; CHECK-NEXT:    shll $24, %edx
550; CHECK-NEXT:    movzbl 1(%eax), %eax
551; CHECK-NEXT:    shll $16, %eax
552; CHECK-NEXT:    orl %edx, %eax
553; CHECK-NEXT:    movzbl 2(%ecx), %edx
554; CHECK-NEXT:    shll $8, %edx
555; CHECK-NEXT:    orl %eax, %edx
556; CHECK-NEXT:    movzbl 3(%ecx), %eax
557; CHECK-NEXT:    orl %edx, %eax
558; CHECK-NEXT:    retl
559;
560; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
561; CHECK64:       # %bb.0:
562; CHECK64-NEXT:    movzbl (%rdi), %eax
563; CHECK64-NEXT:    shll $24, %eax
564; CHECK64-NEXT:    movzbl 1(%rsi), %ecx
565; CHECK64-NEXT:    shll $16, %ecx
566; CHECK64-NEXT:    orl %eax, %ecx
567; CHECK64-NEXT:    movzbl 2(%rdi), %edx
568; CHECK64-NEXT:    shll $8, %edx
569; CHECK64-NEXT:    orl %ecx, %edx
570; CHECK64-NEXT:    movzbl 3(%rdi), %eax
571; CHECK64-NEXT:    orl %edx, %eax
572; CHECK64-NEXT:    retq
573  %tmp = bitcast i32* %arg to i8*
574  %tmp2 = bitcast i32* %arg1 to i8*
575  %tmp3 = load i8, i8* %tmp, align 1
576  %tmp4 = zext i8 %tmp3 to i32
577  %tmp5 = shl nuw nsw i32 %tmp4, 24
578  ; Load from an unrelated address
579  %tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1
580  %tmp7 = load i8, i8* %tmp6, align 1
581  %tmp8 = zext i8 %tmp7 to i32
582  %tmp9 = shl nuw nsw i32 %tmp8, 16
583  %tmp10 = or i32 %tmp9, %tmp5
584  %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2
585  %tmp12 = load i8, i8* %tmp11, align 1
586  %tmp13 = zext i8 %tmp12 to i32
587  %tmp14 = shl nuw nsw i32 %tmp13, 8
588  %tmp15 = or i32 %tmp10, %tmp14
589  %tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3
590  %tmp17 = load i8, i8* %tmp16, align 1
591  %tmp18 = zext i8 %tmp17 to i32
592  %tmp19 = or i32 %tmp15, %tmp18
593  ret i32 %tmp19
594}
595
596; i8* p;
597; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
598define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
599; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
600; CHECK:       # %bb.0:
601; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
602; CHECK-NEXT:    movl 1(%eax), %eax
603; CHECK-NEXT:    retl
604;
605; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
606; CHECK64:       # %bb.0:
607; CHECK64-NEXT:    movl 1(%rdi), %eax
608; CHECK64-NEXT:    retq
609  %tmp = bitcast i32* %arg to i8*
610  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
611  %tmp2 = load i8, i8* %tmp1, align 1
612  %tmp3 = zext i8 %tmp2 to i32
613  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
614  %tmp5 = load i8, i8* %tmp4, align 1
615  %tmp6 = zext i8 %tmp5 to i32
616  %tmp7 = shl nuw nsw i32 %tmp6, 8
617  %tmp8 = or i32 %tmp7, %tmp3
618  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
619  %tmp10 = load i8, i8* %tmp9, align 1
620  %tmp11 = zext i8 %tmp10 to i32
621  %tmp12 = shl nuw nsw i32 %tmp11, 16
622  %tmp13 = or i32 %tmp8, %tmp12
623  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
624  %tmp15 = load i8, i8* %tmp14, align 1
625  %tmp16 = zext i8 %tmp15 to i32
626  %tmp17 = shl nuw nsw i32 %tmp16, 24
627  %tmp18 = or i32 %tmp13, %tmp17
628  ret i32 %tmp18
629}
630
631; i8* p;
632; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
633define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
634; CHECK-LABEL: load_i32_by_i8_neg_offset:
635; CHECK:       # %bb.0:
636; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
637; CHECK-NEXT:    movl -4(%eax), %eax
638; CHECK-NEXT:    retl
639;
640; CHECK64-LABEL: load_i32_by_i8_neg_offset:
641; CHECK64:       # %bb.0:
642; CHECK64-NEXT:    movl -4(%rdi), %eax
643; CHECK64-NEXT:    retq
644  %tmp = bitcast i32* %arg to i8*
645  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
646  %tmp2 = load i8, i8* %tmp1, align 1
647  %tmp3 = zext i8 %tmp2 to i32
648  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
649  %tmp5 = load i8, i8* %tmp4, align 1
650  %tmp6 = zext i8 %tmp5 to i32
651  %tmp7 = shl nuw nsw i32 %tmp6, 8
652  %tmp8 = or i32 %tmp7, %tmp3
653  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
654  %tmp10 = load i8, i8* %tmp9, align 1
655  %tmp11 = zext i8 %tmp10 to i32
656  %tmp12 = shl nuw nsw i32 %tmp11, 16
657  %tmp13 = or i32 %tmp8, %tmp12
658  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
659  %tmp15 = load i8, i8* %tmp14, align 1
660  %tmp16 = zext i8 %tmp15 to i32
661  %tmp17 = shl nuw nsw i32 %tmp16, 24
662  %tmp18 = or i32 %tmp13, %tmp17
663  ret i32 %tmp18
664}
665
666; i8* p;
667; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
668define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
669; BSWAP-LABEL: load_i32_by_i8_nonzero_offset_bswap:
670; BSWAP:       # %bb.0:
671; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
672; BSWAP-NEXT:    movl 1(%eax), %eax
673; BSWAP-NEXT:    bswapl %eax
674; BSWAP-NEXT:    retl
675;
676; MOVBE-LABEL: load_i32_by_i8_nonzero_offset_bswap:
677; MOVBE:       # %bb.0:
678; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
679; MOVBE-NEXT:    movbel 1(%eax), %eax
680; MOVBE-NEXT:    retl
681;
682; BSWAP64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
683; BSWAP64:       # %bb.0:
684; BSWAP64-NEXT:    movl 1(%rdi), %eax
685; BSWAP64-NEXT:    bswapl %eax
686; BSWAP64-NEXT:    retq
687;
688; MOVBE64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
689; MOVBE64:       # %bb.0:
690; MOVBE64-NEXT:    movbel 1(%rdi), %eax
691; MOVBE64-NEXT:    retq
692  %tmp = bitcast i32* %arg to i8*
693  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
694  %tmp2 = load i8, i8* %tmp1, align 1
695  %tmp3 = zext i8 %tmp2 to i32
696  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
697  %tmp5 = load i8, i8* %tmp4, align 1
698  %tmp6 = zext i8 %tmp5 to i32
699  %tmp7 = shl nuw nsw i32 %tmp6, 8
700  %tmp8 = or i32 %tmp7, %tmp3
701  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
702  %tmp10 = load i8, i8* %tmp9, align 1
703  %tmp11 = zext i8 %tmp10 to i32
704  %tmp12 = shl nuw nsw i32 %tmp11, 16
705  %tmp13 = or i32 %tmp8, %tmp12
706  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
707  %tmp15 = load i8, i8* %tmp14, align 1
708  %tmp16 = zext i8 %tmp15 to i32
709  %tmp17 = shl nuw nsw i32 %tmp16, 24
710  %tmp18 = or i32 %tmp13, %tmp17
711  ret i32 %tmp18
712}
713
714; i8* p;
715; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
716define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
717; BSWAP-LABEL: load_i32_by_i8_neg_offset_bswap:
718; BSWAP:       # %bb.0:
719; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
720; BSWAP-NEXT:    movl -4(%eax), %eax
721; BSWAP-NEXT:    bswapl %eax
722; BSWAP-NEXT:    retl
723;
724; MOVBE-LABEL: load_i32_by_i8_neg_offset_bswap:
725; MOVBE:       # %bb.0:
726; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
727; MOVBE-NEXT:    movbel -4(%eax), %eax
728; MOVBE-NEXT:    retl
729;
730; BSWAP64-LABEL: load_i32_by_i8_neg_offset_bswap:
731; BSWAP64:       # %bb.0:
732; BSWAP64-NEXT:    movl -4(%rdi), %eax
733; BSWAP64-NEXT:    bswapl %eax
734; BSWAP64-NEXT:    retq
735;
736; MOVBE64-LABEL: load_i32_by_i8_neg_offset_bswap:
737; MOVBE64:       # %bb.0:
738; MOVBE64-NEXT:    movbel -4(%rdi), %eax
739; MOVBE64-NEXT:    retq
740  %tmp = bitcast i32* %arg to i8*
741  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
742  %tmp2 = load i8, i8* %tmp1, align 1
743  %tmp3 = zext i8 %tmp2 to i32
744  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
745  %tmp5 = load i8, i8* %tmp4, align 1
746  %tmp6 = zext i8 %tmp5 to i32
747  %tmp7 = shl nuw nsw i32 %tmp6, 8
748  %tmp8 = or i32 %tmp7, %tmp3
749  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
750  %tmp10 = load i8, i8* %tmp9, align 1
751  %tmp11 = zext i8 %tmp10 to i32
752  %tmp12 = shl nuw nsw i32 %tmp11, 16
753  %tmp13 = or i32 %tmp8, %tmp12
754  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
755  %tmp15 = load i8, i8* %tmp14, align 1
756  %tmp16 = zext i8 %tmp15 to i32
757  %tmp17 = shl nuw nsw i32 %tmp16, 24
758  %tmp18 = or i32 %tmp13, %tmp17
759  ret i32 %tmp18
760}
761
762; i8* p; i32 i;
763; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
764define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) {
765; BSWAP-LABEL: load_i32_by_i8_bswap_base_index_offset:
766; BSWAP:       # %bb.0:
767; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
768; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %ecx
769; BSWAP-NEXT:    movl (%ecx,%eax), %eax
770; BSWAP-NEXT:    bswapl %eax
771; BSWAP-NEXT:    retl
772;
773; MOVBE-LABEL: load_i32_by_i8_bswap_base_index_offset:
774; MOVBE:       # %bb.0:
775; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
776; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
777; MOVBE-NEXT:    movbel (%ecx,%eax), %eax
778; MOVBE-NEXT:    retl
779;
780; BSWAP64-LABEL: load_i32_by_i8_bswap_base_index_offset:
781; BSWAP64:       # %bb.0:
782; BSWAP64-NEXT:    movslq %esi, %rax
783; BSWAP64-NEXT:    movl (%rdi,%rax), %eax
784; BSWAP64-NEXT:    bswapl %eax
785; BSWAP64-NEXT:    retq
786;
787; MOVBE64-LABEL: load_i32_by_i8_bswap_base_index_offset:
788; MOVBE64:       # %bb.0:
789; MOVBE64-NEXT:    movslq %esi, %rax
790; MOVBE64-NEXT:    movbel (%rdi,%rax), %eax
791; MOVBE64-NEXT:    retq
792  %tmp = bitcast i32* %arg to i8*
793  %tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1
794  %tmp3 = load i8, i8* %tmp2, align 1
795  %tmp4 = zext i8 %tmp3 to i32
796  %tmp5 = shl nuw nsw i32 %tmp4, 24
797  %tmp6 = add nuw nsw i32 %arg1, 1
798  %tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6
799  %tmp8 = load i8, i8* %tmp7, align 1
800  %tmp9 = zext i8 %tmp8 to i32
801  %tmp10 = shl nuw nsw i32 %tmp9, 16
802  %tmp11 = or i32 %tmp10, %tmp5
803  %tmp12 = add nuw nsw i32 %arg1, 2
804  %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12
805  %tmp14 = load i8, i8* %tmp13, align 1
806  %tmp15 = zext i8 %tmp14 to i32
807  %tmp16 = shl nuw nsw i32 %tmp15, 8
808  %tmp17 = or i32 %tmp11, %tmp16
809  %tmp18 = add nuw nsw i32 %arg1, 3
810  %tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18
811  %tmp20 = load i8, i8* %tmp19, align 1
812  %tmp21 = zext i8 %tmp20 to i32
813  %tmp22 = or i32 %tmp17, %tmp21
814  ret i32 %tmp22
815}
816
817; Verify that we don't crash handling shl i32 %conv57, 32
818define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) {
819; CHECK-LABEL: shift_i32_by_32:
820; CHECK:       # %bb.0: # %entry
821; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
822; CHECK-NEXT:    movl $-1, 4(%eax)
823; CHECK-NEXT:    movl $-1, (%eax)
824; CHECK-NEXT:    retl
825;
826; CHECK64-LABEL: shift_i32_by_32:
827; CHECK64:       # %bb.0: # %entry
828; CHECK64-NEXT:    movq $-1, (%rdx)
829; CHECK64-NEXT:    retq
830entry:
831  %load1 = load i8, i8* %src1, align 1
832  %conv46 = zext i8 %load1 to i32
833  %shl47 = shl i32 %conv46, 56
834  %or55 = or i32 %shl47, 0
835  %load2 = load i8, i8* %src2, align 1
836  %conv57 = zext i8 %load2 to i32
837  %shl58 = shl i32 %conv57, 32
838  %or59 = or i32 %or55, %shl58
839  %or74 = or i32 %or59, 0
840  %conv75 = sext i32 %or74 to i64
841  store i64 %conv75, i64* %dst, align 8
842  ret void
843}
844
845declare i16 @llvm.bswap.i16(i16)
846
847; i16* p;
848; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
849define i32 @load_i32_by_bswap_i16(i32* %arg) {
850; BSWAP-LABEL: load_i32_by_bswap_i16:
851; BSWAP:       # %bb.0:
852; BSWAP-NEXT:    movl {{[0-9]+}}(%esp), %eax
853; BSWAP-NEXT:    movl (%eax), %eax
854; BSWAP-NEXT:    bswapl %eax
855; BSWAP-NEXT:    retl
856;
857; MOVBE-LABEL: load_i32_by_bswap_i16:
858; MOVBE:       # %bb.0:
859; MOVBE-NEXT:    movl {{[0-9]+}}(%esp), %eax
860; MOVBE-NEXT:    movbel (%eax), %eax
861; MOVBE-NEXT:    retl
862;
863; BSWAP64-LABEL: load_i32_by_bswap_i16:
864; BSWAP64:       # %bb.0:
865; BSWAP64-NEXT:    movl (%rdi), %eax
866; BSWAP64-NEXT:    bswapl %eax
867; BSWAP64-NEXT:    retq
868;
869; MOVBE64-LABEL: load_i32_by_bswap_i16:
870; MOVBE64:       # %bb.0:
871; MOVBE64-NEXT:    movbel (%rdi), %eax
872; MOVBE64-NEXT:    retq
873  %tmp = bitcast i32* %arg to i16*
874  %tmp1 = load i16, i16* %tmp, align 4
875  %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
876  %tmp2 = zext i16 %tmp11 to i32
877  %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
878  %tmp4 = load i16, i16* %tmp3, align 1
879  %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
880  %tmp5 = zext i16 %tmp41 to i32
881  %tmp6 = shl nuw nsw i32 %tmp2, 16
882  %tmp7 = or i32 %tmp6, %tmp5
883  ret i32 %tmp7
884}
885
886; i16* p;
887; (i32) p[0] | (sext(p[1] << 16) to i32)
888define i32 @load_i32_by_sext_i16(i32* %arg) {
889; CHECK-LABEL: load_i32_by_sext_i16:
890; CHECK:       # %bb.0:
891; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
892; CHECK-NEXT:    movl (%eax), %eax
893; CHECK-NEXT:    retl
894;
895; CHECK64-LABEL: load_i32_by_sext_i16:
896; CHECK64:       # %bb.0:
897; CHECK64-NEXT:    movl (%rdi), %eax
898; CHECK64-NEXT:    retq
899  %tmp = bitcast i32* %arg to i16*
900  %tmp1 = load i16, i16* %tmp, align 1
901  %tmp2 = zext i16 %tmp1 to i32
902  %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
903  %tmp4 = load i16, i16* %tmp3, align 1
904  %tmp5 = sext i16 %tmp4 to i32
905  %tmp6 = shl nuw nsw i32 %tmp5, 16
906  %tmp7 = or i32 %tmp6, %tmp2
907  ret i32 %tmp7
908}
909
910; i8* arg; i32 i;
911; p = arg + 12;
912; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
913define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
914; CHECK-LABEL: load_i32_by_i8_base_offset_index:
915; CHECK:       # %bb.0:
916; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
917; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
918; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
919; CHECK-NEXT:    retl
920;
921; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
922; CHECK64:       # %bb.0:
923; CHECK64-NEXT:    movl %esi, %eax
924; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
925; CHECK64-NEXT:    retq
926  %tmp = add nuw nsw i32 %i, 3
927  %tmp2 = add nuw nsw i32 %i, 2
928  %tmp3 = add nuw nsw i32 %i, 1
929  %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
930  %tmp5 = zext i32 %i to i64
931  %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
932  %tmp7 = load i8, i8* %tmp6, align 1
933  %tmp8 = zext i8 %tmp7 to i32
934  %tmp9 = zext i32 %tmp3 to i64
935  %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
936  %tmp11 = load i8, i8* %tmp10, align 1
937  %tmp12 = zext i8 %tmp11 to i32
938  %tmp13 = shl nuw nsw i32 %tmp12, 8
939  %tmp14 = or i32 %tmp13, %tmp8
940  %tmp15 = zext i32 %tmp2 to i64
941  %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
942  %tmp17 = load i8, i8* %tmp16, align 1
943  %tmp18 = zext i8 %tmp17 to i32
944  %tmp19 = shl nuw nsw i32 %tmp18, 16
945  %tmp20 = or i32 %tmp14, %tmp19
946  %tmp21 = zext i32 %tmp to i64
947  %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
948  %tmp23 = load i8, i8* %tmp22, align 1
949  %tmp24 = zext i8 %tmp23 to i32
950  %tmp25 = shl nuw i32 %tmp24, 24
951  %tmp26 = or i32 %tmp20, %tmp25
952  ret i32 %tmp26
953}
954
955; i8* arg; i32 i;
956; p = arg + 12;
957; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
958define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
959; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
960; CHECK:       # %bb.0:
961; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
962; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
963; CHECK-NEXT:    movl 13(%eax,%ecx), %eax
964; CHECK-NEXT:    retl
965;
966; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
967; CHECK64:       # %bb.0:
968; CHECK64-NEXT:    movl %esi, %eax
969; CHECK64-NEXT:    movl 13(%rax,%rdi), %eax
970; CHECK64-NEXT:    retq
971  %tmp = add nuw nsw i32 %i, 4
972  %tmp2 = add nuw nsw i32 %i, 3
973  %tmp3 = add nuw nsw i32 %i, 2
974  %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
975  %tmp5 = add nuw nsw i32 %i, 1
976  %tmp27 = zext i32 %tmp5 to i64
977  %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
978  %tmp29 = load i8, i8* %tmp28, align 1
979  %tmp30 = zext i8 %tmp29 to i32
980  %tmp31 = zext i32 %tmp3 to i64
981  %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
982  %tmp33 = load i8, i8* %tmp32, align 1
983  %tmp34 = zext i8 %tmp33 to i32
984  %tmp35 = shl nuw nsw i32 %tmp34, 8
985  %tmp36 = or i32 %tmp35, %tmp30
986  %tmp37 = zext i32 %tmp2 to i64
987  %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
988  %tmp39 = load i8, i8* %tmp38, align 1
989  %tmp40 = zext i8 %tmp39 to i32
990  %tmp41 = shl nuw nsw i32 %tmp40, 16
991  %tmp42 = or i32 %tmp36, %tmp41
992  %tmp43 = zext i32 %tmp to i64
993  %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
994  %tmp45 = load i8, i8* %tmp44, align 1
995  %tmp46 = zext i8 %tmp45 to i32
996  %tmp47 = shl nuw i32 %tmp46, 24
997  %tmp48 = or i32 %tmp42, %tmp47
998  ret i32 %tmp48
999}
1000
1001; i8* arg; i32 i;
1002;
1003; p0 = arg;
1004; p1 = arg + i + 1;
1005; p2 = arg + i + 2;
1006; p3 = arg + i + 3;
1007;
1008; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1009;
1010; This test excercises zero and any extend loads as a part of load combine pattern.
1011; In order to fold the pattern above we need to reassociate the address computation
1012; first. By the time the address computation is reassociated loads are combined to
1013; to zext and aext loads.
1014define i32 @load_i32_by_i8_zaext_loads(i8* %arg, i32 %arg1) {
1015; CHECK-LABEL: load_i32_by_i8_zaext_loads:
1016; CHECK:       # %bb.0:
1017; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1018; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1019; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
1020; CHECK-NEXT:    retl
1021;
1022; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
1023; CHECK64:       # %bb.0:
1024; CHECK64-NEXT:    movl %esi, %eax
1025; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
1026; CHECK64-NEXT:    retq
1027  %tmp = add nuw nsw i32 %arg1, 3
1028  %tmp2 = add nuw nsw i32 %arg1, 2
1029  %tmp3 = add nuw nsw i32 %arg1, 1
1030  %tmp4 = zext i32 %tmp to i64
1031  %tmp5 = zext i32 %tmp2 to i64
1032  %tmp6 = zext i32 %tmp3 to i64
1033  %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
1034  %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
1035  %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
1036  %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
1037  %tmp33 = zext i32 %arg1 to i64
1038  %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
1039  %tmp35 = load i8, i8* %tmp34, align 1
1040  %tmp36 = zext i8 %tmp35 to i32
1041  %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
1042  %tmp38 = load i8, i8* %tmp37, align 1
1043  %tmp39 = zext i8 %tmp38 to i32
1044  %tmp40 = shl nuw nsw i32 %tmp39, 8
1045  %tmp41 = or i32 %tmp40, %tmp36
1046  %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
1047  %tmp43 = load i8, i8* %tmp42, align 1
1048  %tmp44 = zext i8 %tmp43 to i32
1049  %tmp45 = shl nuw nsw i32 %tmp44, 16
1050  %tmp46 = or i32 %tmp41, %tmp45
1051  %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
1052  %tmp48 = load i8, i8* %tmp47, align 1
1053  %tmp49 = zext i8 %tmp48 to i32
1054  %tmp50 = shl nuw i32 %tmp49, 24
1055  %tmp51 = or i32 %tmp46, %tmp50
1056  ret i32 %tmp51
1057}
1058
1059; The same as load_i32_by_i8_zaext_loads but the last load is combined to
1060; a sext load.
1061;
1062; i8* arg; i32 i;
1063;
1064; p0 = arg;
1065; p1 = arg + i + 1;
1066; p2 = arg + i + 2;
1067; p3 = arg + i + 3;
1068;
1069; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1070define i32 @load_i32_by_i8_zsext_loads(i8* %arg, i32 %arg1) {
1071; CHECK-LABEL: load_i32_by_i8_zsext_loads:
1072; CHECK:       # %bb.0:
1073; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1074; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1075; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
1076; CHECK-NEXT:    retl
1077;
1078; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
1079; CHECK64:       # %bb.0:
1080; CHECK64-NEXT:    movl %esi, %eax
1081; CHECK64-NEXT:    movl 12(%rdi,%rax), %eax
1082; CHECK64-NEXT:    retq
1083  %tmp = add nuw nsw i32 %arg1, 3
1084  %tmp2 = add nuw nsw i32 %arg1, 2
1085  %tmp3 = add nuw nsw i32 %arg1, 1
1086  %tmp4 = zext i32 %tmp to i64
1087  %tmp5 = zext i32 %tmp2 to i64
1088  %tmp6 = zext i32 %tmp3 to i64
1089  %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
1090  %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
1091  %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
1092  %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
1093  %tmp33 = zext i32 %arg1 to i64
1094  %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
1095  %tmp35 = load i8, i8* %tmp34, align 1
1096  %tmp36 = zext i8 %tmp35 to i32
1097  %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
1098  %tmp38 = load i8, i8* %tmp37, align 1
1099  %tmp39 = zext i8 %tmp38 to i32
1100  %tmp40 = shl nuw nsw i32 %tmp39, 8
1101  %tmp41 = or i32 %tmp40, %tmp36
1102  %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
1103  %tmp43 = load i8, i8* %tmp42, align 1
1104  %tmp44 = zext i8 %tmp43 to i32
1105  %tmp45 = shl nuw nsw i32 %tmp44, 16
1106  %tmp46 = or i32 %tmp41, %tmp45
1107  %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
1108  %tmp48 = load i8, i8* %tmp47, align 1
1109  %tmp49 = sext i8 %tmp48 to i16
1110  %tmp50 = zext i16 %tmp49 to i32
1111  %tmp51 = shl nuw i32 %tmp50, 24
1112  %tmp52 = or i32 %tmp46, %tmp51
1113  ret i32 %tmp52
1114}
1115
1116; i8* p;
1117; (i32) p[0] | ((i32) p[1] << 8)
1118define i32 @zext_load_i32_by_i8(i32* %arg) {
1119; CHECK-LABEL: zext_load_i32_by_i8:
1120; CHECK:       # %bb.0:
1121; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1122; CHECK-NEXT:    movzwl (%eax), %eax
1123; CHECK-NEXT:    retl
1124;
1125; CHECK64-LABEL: zext_load_i32_by_i8:
1126; CHECK64:       # %bb.0:
1127; CHECK64-NEXT:    movzwl (%rdi), %eax
1128; CHECK64-NEXT:    retq
1129  %tmp = bitcast i32* %arg to i8*
1130  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
1131  %tmp2 = load i8, i8* %tmp1, align 1
1132  %tmp3 = zext i8 %tmp2 to i32
1133  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
1134  %tmp5 = load i8, i8* %tmp4, align 1
1135  %tmp6 = zext i8 %tmp5 to i32
1136  %tmp7 = shl nuw nsw i32 %tmp6, 8
1137  %tmp8 = or i32 %tmp7, %tmp3
1138  ret i32 %tmp8
1139}
1140
1141; i8* p;
1142; ((i32) p[0] << 8) | ((i32) p[1] << 16)
1143define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
1144; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
1145; CHECK:       # %bb.0:
1146; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1147; CHECK-NEXT:    movzbl (%eax), %ecx
1148; CHECK-NEXT:    shll $8, %ecx
1149; CHECK-NEXT:    movzbl 1(%eax), %eax
1150; CHECK-NEXT:    shll $16, %eax
1151; CHECK-NEXT:    orl %ecx, %eax
1152; CHECK-NEXT:    retl
1153;
1154; CHECK64-LABEL: zext_load_i32_by_i8_shl_8:
1155; CHECK64:       # %bb.0:
1156; CHECK64-NEXT:    movzbl (%rdi), %ecx
1157; CHECK64-NEXT:    shll $8, %ecx
1158; CHECK64-NEXT:    movzbl 1(%rdi), %eax
1159; CHECK64-NEXT:    shll $16, %eax
1160; CHECK64-NEXT:    orl %ecx, %eax
1161; CHECK64-NEXT:    retq
1162  %tmp = bitcast i32* %arg to i8*
1163  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
1164  %tmp2 = load i8, i8* %tmp1, align 1
1165  %tmp3 = zext i8 %tmp2 to i32
1166  %tmp30 = shl nuw nsw i32 %tmp3, 8
1167  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
1168  %tmp5 = load i8, i8* %tmp4, align 1
1169  %tmp6 = zext i8 %tmp5 to i32
1170  %tmp7 = shl nuw nsw i32 %tmp6, 16
1171  %tmp8 = or i32 %tmp7, %tmp30
1172  ret i32 %tmp8
1173}
1174
1175; i8* p;
1176; ((i32) p[0] << 16) | ((i32) p[1] << 24)
1177define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
1178; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
1179; CHECK:       # %bb.0:
1180; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1181; CHECK-NEXT:    movzbl (%eax), %ecx
1182; CHECK-NEXT:    shll $16, %ecx
1183; CHECK-NEXT:    movzbl 1(%eax), %eax
1184; CHECK-NEXT:    shll $24, %eax
1185; CHECK-NEXT:    orl %ecx, %eax
1186; CHECK-NEXT:    retl
1187;
1188; CHECK64-LABEL: zext_load_i32_by_i8_shl_16:
1189; CHECK64:       # %bb.0:
1190; CHECK64-NEXT:    movzbl (%rdi), %ecx
1191; CHECK64-NEXT:    shll $16, %ecx
1192; CHECK64-NEXT:    movzbl 1(%rdi), %eax
1193; CHECK64-NEXT:    shll $24, %eax
1194; CHECK64-NEXT:    orl %ecx, %eax
1195; CHECK64-NEXT:    retq
1196  %tmp = bitcast i32* %arg to i8*
1197  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
1198  %tmp2 = load i8, i8* %tmp1, align 1
1199  %tmp3 = zext i8 %tmp2 to i32
1200  %tmp30 = shl nuw nsw i32 %tmp3, 16
1201  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
1202  %tmp5 = load i8, i8* %tmp4, align 1
1203  %tmp6 = zext i8 %tmp5 to i32
1204  %tmp7 = shl nuw nsw i32 %tmp6, 24
1205  %tmp8 = or i32 %tmp7, %tmp30
1206  ret i32 %tmp8
1207}
1208
1209; i8* p;
1210; (i32) p[1] | ((i32) p[0] << 8)
1211define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
1212; CHECK-LABEL: zext_load_i32_by_i8_bswap:
1213; CHECK:       # %bb.0:
1214; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1215; CHECK-NEXT:    movzwl (%eax), %eax
1216; CHECK-NEXT:    shll $16, %eax
1217; CHECK-NEXT:    bswapl %eax
1218; CHECK-NEXT:    retl
1219;
1220; CHECK64-LABEL: zext_load_i32_by_i8_bswap:
1221; CHECK64:       # %bb.0:
1222; CHECK64-NEXT:    movzwl (%rdi), %eax
1223; CHECK64-NEXT:    shll $16, %eax
1224; CHECK64-NEXT:    bswapl %eax
1225; CHECK64-NEXT:    retq
1226  %tmp = bitcast i32* %arg to i8*
1227  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1228  %tmp2 = load i8, i8* %tmp1, align 1
1229  %tmp3 = zext i8 %tmp2 to i32
1230  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1231  %tmp5 = load i8, i8* %tmp4, align 1
1232  %tmp6 = zext i8 %tmp5 to i32
1233  %tmp7 = shl nuw nsw i32 %tmp6, 8
1234  %tmp8 = or i32 %tmp7, %tmp3
1235  ret i32 %tmp8
1236}
1237
1238; i8* p;
1239; ((i32) p[1] << 8) | ((i32) p[0] << 16)
1240define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
1241; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1242; CHECK:       # %bb.0:
1243; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1244; CHECK-NEXT:    movzbl 1(%eax), %ecx
1245; CHECK-NEXT:    shll $8, %ecx
1246; CHECK-NEXT:    movzbl (%eax), %eax
1247; CHECK-NEXT:    shll $16, %eax
1248; CHECK-NEXT:    orl %ecx, %eax
1249; CHECK-NEXT:    retl
1250;
1251; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1252; CHECK64:       # %bb.0:
1253; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
1254; CHECK64-NEXT:    shll $8, %ecx
1255; CHECK64-NEXT:    movzbl (%rdi), %eax
1256; CHECK64-NEXT:    shll $16, %eax
1257; CHECK64-NEXT:    orl %ecx, %eax
1258; CHECK64-NEXT:    retq
1259  %tmp = bitcast i32* %arg to i8*
1260  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1261  %tmp2 = load i8, i8* %tmp1, align 1
1262  %tmp3 = zext i8 %tmp2 to i32
1263  %tmp30 = shl nuw nsw i32 %tmp3, 8
1264  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1265  %tmp5 = load i8, i8* %tmp4, align 1
1266  %tmp6 = zext i8 %tmp5 to i32
1267  %tmp7 = shl nuw nsw i32 %tmp6, 16
1268  %tmp8 = or i32 %tmp7, %tmp30
1269  ret i32 %tmp8
1270}
1271
1272; i8* p;
1273; ((i32) p[1] << 16) | ((i32) p[0] << 24)
1274define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
1275; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1276; CHECK:       # %bb.0:
1277; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
1278; CHECK-NEXT:    movzbl 1(%eax), %ecx
1279; CHECK-NEXT:    shll $16, %ecx
1280; CHECK-NEXT:    movzbl (%eax), %eax
1281; CHECK-NEXT:    shll $24, %eax
1282; CHECK-NEXT:    orl %ecx, %eax
1283; CHECK-NEXT:    retl
1284;
1285; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1286; CHECK64:       # %bb.0:
1287; CHECK64-NEXT:    movzbl 1(%rdi), %ecx
1288; CHECK64-NEXT:    shll $16, %ecx
1289; CHECK64-NEXT:    movzbl (%rdi), %eax
1290; CHECK64-NEXT:    shll $24, %eax
1291; CHECK64-NEXT:    orl %ecx, %eax
1292; CHECK64-NEXT:    retq
1293  %tmp = bitcast i32* %arg to i8*
1294  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1295  %tmp2 = load i8, i8* %tmp1, align 1
1296  %tmp3 = zext i8 %tmp2 to i32
1297  %tmp30 = shl nuw nsw i32 %tmp3, 16
1298  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1299  %tmp5 = load i8, i8* %tmp4, align 1
1300  %tmp6 = zext i8 %tmp5 to i32
1301  %tmp7 = shl nuw nsw i32 %tmp6, 24
1302  %tmp8 = or i32 %tmp7, %tmp30
1303  ret i32 %tmp8
1304}
1305