1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; bswap should be constant folded when it is passed a constant argument
3
4; RUN: llc < %s -mtriple=i686-- -mcpu=i686 | FileCheck %s
5; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=CHECK64
6
7declare i16 @llvm.bswap.i16(i16)
8declare i32 @llvm.bswap.i32(i32)
9declare i64 @llvm.bswap.i64(i64)
10
11define i16 @W(i16 %A) {
12; CHECK-LABEL: W:
13; CHECK:       # %bb.0:
14; CHECK-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
15; CHECK-NEXT:    rolw $8, %ax
16; CHECK-NEXT:    retl
17;
18; CHECK64-LABEL: W:
19; CHECK64:       # %bb.0:
20; CHECK64-NEXT:    movl %edi, %eax
21; CHECK64-NEXT:    rolw $8, %ax
22; CHECK64-NEXT:    # kill: def $ax killed $ax killed $eax
23; CHECK64-NEXT:    retq
24        %Z = call i16 @llvm.bswap.i16( i16 %A )         ; <i16> [#uses=1]
25        ret i16 %Z
26}
27
28define dso_local i32 @X(i32 %A) {
29; CHECK-LABEL: X:
30; CHECK:       # %bb.0:
31; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
32; CHECK-NEXT:    bswapl %eax
33; CHECK-NEXT:    retl
34;
35; CHECK64-LABEL: X:
36; CHECK64:       # %bb.0:
37; CHECK64-NEXT:    movl %edi, %eax
38; CHECK64-NEXT:    bswapl %eax
39; CHECK64-NEXT:    retq
40        %Z = call i32 @llvm.bswap.i32( i32 %A )         ; <i32> [#uses=1]
41        ret i32 %Z
42}
43
44define i64 @Y(i64 %A) {
45; CHECK-LABEL: Y:
46; CHECK:       # %bb.0:
47; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
48; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
49; CHECK-NEXT:    bswapl %eax
50; CHECK-NEXT:    bswapl %edx
51; CHECK-NEXT:    retl
52;
53; CHECK64-LABEL: Y:
54; CHECK64:       # %bb.0:
55; CHECK64-NEXT:    movq %rdi, %rax
56; CHECK64-NEXT:    bswapq %rax
57; CHECK64-NEXT:    retq
58        %Z = call i64 @llvm.bswap.i64( i64 %A )         ; <i64> [#uses=1]
59        ret i64 %Z
60}
61
62; This isn't really a bswap test, but the potential probem is
63; easier to see with bswap vs. other ops. The transform in
64; question starts with a bitwise logic op and tries to hoist
65; those ahead of other ops. But that's not generally profitable
66; when the other ops have other uses (and it might not be safe
67; either due to unconstrained instruction count growth).
68
69define dso_local i32 @bswap_multiuse(i32 %x, i32 %y, i32* %p1, i32* %p2) nounwind {
70; CHECK-LABEL: bswap_multiuse:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    pushl %esi
73; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
74; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
75; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
76; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
77; CHECK-NEXT:    bswapl %esi
78; CHECK-NEXT:    bswapl %eax
79; CHECK-NEXT:    movl %esi, (%edx)
80; CHECK-NEXT:    movl %eax, (%ecx)
81; CHECK-NEXT:    orl %esi, %eax
82; CHECK-NEXT:    popl %esi
83; CHECK-NEXT:    retl
84;
85; CHECK64-LABEL: bswap_multiuse:
86; CHECK64:       # %bb.0:
87; CHECK64-NEXT:    movl %esi, %eax
88; CHECK64-NEXT:    bswapl %edi
89; CHECK64-NEXT:    bswapl %eax
90; CHECK64-NEXT:    movl %edi, (%rdx)
91; CHECK64-NEXT:    movl %eax, (%rcx)
92; CHECK64-NEXT:    orl %edi, %eax
93; CHECK64-NEXT:    retq
94  %xt = call i32 @llvm.bswap.i32(i32 %x)
95  %yt = call i32 @llvm.bswap.i32(i32 %y)
96  store i32 %xt, i32* %p1
97  store i32 %yt, i32* %p2
98  %r = or i32 %xt, %yt
99  ret i32 %r
100}
101
102; rdar://9164521
103define dso_local i32 @test1(i32 %a) nounwind readnone {
104; CHECK-LABEL: test1:
105; CHECK:       # %bb.0:
106; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
107; CHECK-NEXT:    bswapl %eax
108; CHECK-NEXT:    shrl $16, %eax
109; CHECK-NEXT:    retl
110;
111; CHECK64-LABEL: test1:
112; CHECK64:       # %bb.0:
113; CHECK64-NEXT:    movl %edi, %eax
114; CHECK64-NEXT:    bswapl %eax
115; CHECK64-NEXT:    shrl $16, %eax
116; CHECK64-NEXT:    retq
117  %and = lshr i32 %a, 8
118  %shr3 = and i32 %and, 255
119  %and2 = shl i32 %a, 8
120  %shl = and i32 %and2, 65280
121  %or = or i32 %shr3, %shl
122  ret i32 %or
123}
124
125define dso_local i32 @test2(i32 %a) nounwind readnone {
126; CHECK-LABEL: test2:
127; CHECK:       # %bb.0:
128; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
129; CHECK-NEXT:    bswapl %eax
130; CHECK-NEXT:    sarl $16, %eax
131; CHECK-NEXT:    retl
132;
133; CHECK64-LABEL: test2:
134; CHECK64:       # %bb.0:
135; CHECK64-NEXT:    movl %edi, %eax
136; CHECK64-NEXT:    bswapl %eax
137; CHECK64-NEXT:    sarl $16, %eax
138; CHECK64-NEXT:    retq
139  %and = lshr i32 %a, 8
140  %shr4 = and i32 %and, 255
141  %and2 = shl i32 %a, 8
142  %or = or i32 %shr4, %and2
143  %sext = shl i32 %or, 16
144  %conv3 = ashr exact i32 %sext, 16
145  ret i32 %conv3
146}
147
148@var8 = dso_local global i8 0
149@var16 = dso_local global i16 0
150
151; The "shl" below can move bits into the high parts of the value, so the
152; operation is not a "bswap, shr" pair.
153
154; rdar://problem/14814049
155define i64 @not_bswap() {
156; CHECK-LABEL: not_bswap:
157; CHECK:       # %bb.0:
158; CHECK-NEXT:    movzwl var16, %eax
159; CHECK-NEXT:    movl %eax, %ecx
160; CHECK-NEXT:    shrl $8, %ecx
161; CHECK-NEXT:    shll $8, %eax
162; CHECK-NEXT:    orl %ecx, %eax
163; CHECK-NEXT:    xorl %edx, %edx
164; CHECK-NEXT:    retl
165;
166; CHECK64-LABEL: not_bswap:
167; CHECK64:       # %bb.0:
168; CHECK64-NEXT:    movzwl var16(%rip), %eax
169; CHECK64-NEXT:    movq %rax, %rcx
170; CHECK64-NEXT:    shrq $8, %rcx
171; CHECK64-NEXT:    shlq $8, %rax
172; CHECK64-NEXT:    orq %rcx, %rax
173; CHECK64-NEXT:    retq
174  %init = load i16, i16* @var16
175  %big = zext i16 %init to i64
176
177  %hishifted = lshr i64 %big, 8
178  %loshifted = shl i64 %big, 8
179
180  %notswapped = or i64 %hishifted, %loshifted
181
182  ret i64 %notswapped
183}
184
185; This time, the lshr (and subsequent or) is completely useless. While it's
186; technically correct to convert this into a "bswap, shr", it's suboptimal. A
187; simple shl works better.
188
189define i64 @not_useful_bswap() {
190; CHECK-LABEL: not_useful_bswap:
191; CHECK:       # %bb.0:
192; CHECK-NEXT:    movzbl var8, %eax
193; CHECK-NEXT:    shll $8, %eax
194; CHECK-NEXT:    xorl %edx, %edx
195; CHECK-NEXT:    retl
196;
197; CHECK64-LABEL: not_useful_bswap:
198; CHECK64:       # %bb.0:
199; CHECK64-NEXT:    movzbl var8(%rip), %eax
200; CHECK64-NEXT:    shlq $8, %rax
201; CHECK64-NEXT:    retq
202  %init = load i8, i8* @var8
203  %big = zext i8 %init to i64
204
205  %hishifted = lshr i64 %big, 8
206  %loshifted = shl i64 %big, 8
207
208  %notswapped = or i64 %hishifted, %loshifted
209
210  ret i64 %notswapped
211}
212
213; Finally, it *is* OK to just mask off the shl if we know that the value is zero
214; beyond 16 bits anyway. This is a legitimate bswap.
215
216define i64 @finally_useful_bswap() {
217; CHECK-LABEL: finally_useful_bswap:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    movzwl var16, %eax
220; CHECK-NEXT:    bswapl %eax
221; CHECK-NEXT:    shrl $16, %eax
222; CHECK-NEXT:    xorl %edx, %edx
223; CHECK-NEXT:    retl
224;
225; CHECK64-LABEL: finally_useful_bswap:
226; CHECK64:       # %bb.0:
227; CHECK64-NEXT:    movzwl var16(%rip), %eax
228; CHECK64-NEXT:    bswapq %rax
229; CHECK64-NEXT:    shrq $48, %rax
230; CHECK64-NEXT:    retq
231  %init = load i16, i16* @var16
232  %big = zext i16 %init to i64
233
234  %hishifted = lshr i64 %big, 8
235  %lomasked = and i64 %big, 255
236  %loshifted = shl i64 %lomasked, 8
237
238  %swapped = or i64 %hishifted, %loshifted
239
240  ret i64 %swapped
241}
242
243; Make sure we don't assert during type legalization promoting a large
244; bswap due to the need for a large shift that won't fit in the i8 returned
245; from getShiftAmountTy.
246define i528 @large_promotion(i528 %A) nounwind {
247; CHECK-LABEL: large_promotion:
248; CHECK:       # %bb.0:
249; CHECK-NEXT:    pushl %ebp
250; CHECK-NEXT:    pushl %ebx
251; CHECK-NEXT:    pushl %edi
252; CHECK-NEXT:    pushl %esi
253; CHECK-NEXT:    subl $44, %esp
254; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
255; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebx
256; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
257; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
258; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
259; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
260; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
261; CHECK-NEXT:    bswapl %eax
262; CHECK-NEXT:    bswapl %ecx
263; CHECK-NEXT:    shrdl $16, %ecx, %eax
264; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
265; CHECK-NEXT:    bswapl %edx
266; CHECK-NEXT:    shrdl $16, %edx, %ecx
267; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
268; CHECK-NEXT:    bswapl %esi
269; CHECK-NEXT:    shrdl $16, %esi, %edx
270; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
271; CHECK-NEXT:    bswapl %edi
272; CHECK-NEXT:    shrdl $16, %edi, %esi
273; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
274; CHECK-NEXT:    bswapl %ebx
275; CHECK-NEXT:    shrdl $16, %ebx, %edi
276; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
277; CHECK-NEXT:    bswapl %ebp
278; CHECK-NEXT:    shrdl $16, %ebp, %ebx
279; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
280; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
281; CHECK-NEXT:    bswapl %ecx
282; CHECK-NEXT:    shrdl $16, %ecx, %ebp
283; CHECK-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
284; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
285; CHECK-NEXT:    bswapl %eax
286; CHECK-NEXT:    shrdl $16, %eax, %ecx
287; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
288; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
289; CHECK-NEXT:    bswapl %ecx
290; CHECK-NEXT:    shrdl $16, %ecx, %eax
291; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
292; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
293; CHECK-NEXT:    bswapl %eax
294; CHECK-NEXT:    shrdl $16, %eax, %ecx
295; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
296; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
297; CHECK-NEXT:    bswapl %ebp
298; CHECK-NEXT:    shrdl $16, %ebp, %eax
299; CHECK-NEXT:    movl %eax, (%esp) # 4-byte Spill
300; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebx
301; CHECK-NEXT:    bswapl %ebx
302; CHECK-NEXT:    shrdl $16, %ebx, %ebp
303; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
304; CHECK-NEXT:    bswapl %esi
305; CHECK-NEXT:    shrdl $16, %esi, %ebx
306; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
307; CHECK-NEXT:    bswapl %edx
308; CHECK-NEXT:    shrdl $16, %edx, %esi
309; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
310; CHECK-NEXT:    bswapl %ecx
311; CHECK-NEXT:    shrdl $16, %ecx, %edx
312; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
313; CHECK-NEXT:    bswapl %edi
314; CHECK-NEXT:    shrdl $16, %edi, %ecx
315; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
316; CHECK-NEXT:    movl %ecx, 60(%eax)
317; CHECK-NEXT:    movl %edx, 56(%eax)
318; CHECK-NEXT:    movl %esi, 52(%eax)
319; CHECK-NEXT:    movl %ebx, 48(%eax)
320; CHECK-NEXT:    movl %ebp, 44(%eax)
321; CHECK-NEXT:    movl (%esp), %ecx # 4-byte Reload
322; CHECK-NEXT:    movl %ecx, 40(%eax)
323; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
324; CHECK-NEXT:    movl %ecx, 36(%eax)
325; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
326; CHECK-NEXT:    movl %ecx, 32(%eax)
327; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
328; CHECK-NEXT:    movl %ecx, 28(%eax)
329; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
330; CHECK-NEXT:    movl %ecx, 24(%eax)
331; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
332; CHECK-NEXT:    movl %ecx, 20(%eax)
333; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
334; CHECK-NEXT:    movl %ecx, 16(%eax)
335; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
336; CHECK-NEXT:    movl %ecx, 12(%eax)
337; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
338; CHECK-NEXT:    movl %ecx, 8(%eax)
339; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
340; CHECK-NEXT:    movl %ecx, 4(%eax)
341; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
342; CHECK-NEXT:    movl %ecx, (%eax)
343; CHECK-NEXT:    shrl $16, %edi
344; CHECK-NEXT:    movw %di, 64(%eax)
345; CHECK-NEXT:    addl $44, %esp
346; CHECK-NEXT:    popl %esi
347; CHECK-NEXT:    popl %edi
348; CHECK-NEXT:    popl %ebx
349; CHECK-NEXT:    popl %ebp
350; CHECK-NEXT:    retl $4
351;
352; CHECK64-LABEL: large_promotion:
353; CHECK64:       # %bb.0:
354; CHECK64-NEXT:    pushq %rbx
355; CHECK64-NEXT:    movq %rdi, %rax
356; CHECK64-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
357; CHECK64-NEXT:    movq {{[0-9]+}}(%rsp), %r11
358; CHECK64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
359; CHECK64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
360; CHECK64-NEXT:    bswapq %r10
361; CHECK64-NEXT:    bswapq %rdi
362; CHECK64-NEXT:    shrdq $48, %rdi, %r10
363; CHECK64-NEXT:    bswapq %r11
364; CHECK64-NEXT:    shrdq $48, %r11, %rdi
365; CHECK64-NEXT:    bswapq %rbx
366; CHECK64-NEXT:    shrdq $48, %rbx, %r11
367; CHECK64-NEXT:    bswapq %r9
368; CHECK64-NEXT:    shrdq $48, %r9, %rbx
369; CHECK64-NEXT:    bswapq %r8
370; CHECK64-NEXT:    shrdq $48, %r8, %r9
371; CHECK64-NEXT:    bswapq %rcx
372; CHECK64-NEXT:    shrdq $48, %rcx, %r8
373; CHECK64-NEXT:    bswapq %rdx
374; CHECK64-NEXT:    shrdq $48, %rdx, %rcx
375; CHECK64-NEXT:    bswapq %rsi
376; CHECK64-NEXT:    shrdq $48, %rsi, %rdx
377; CHECK64-NEXT:    shrq $48, %rsi
378; CHECK64-NEXT:    movq %rdx, 56(%rax)
379; CHECK64-NEXT:    movq %rcx, 48(%rax)
380; CHECK64-NEXT:    movq %r8, 40(%rax)
381; CHECK64-NEXT:    movq %r9, 32(%rax)
382; CHECK64-NEXT:    movq %rbx, 24(%rax)
383; CHECK64-NEXT:    movq %r11, 16(%rax)
384; CHECK64-NEXT:    movq %rdi, 8(%rax)
385; CHECK64-NEXT:    movq %r10, (%rax)
386; CHECK64-NEXT:    movw %si, 64(%rax)
387; CHECK64-NEXT:    popq %rbx
388; CHECK64-NEXT:    retq
389  %Z = call i528 @llvm.bswap.i528(i528 %A)
390  ret i528 %Z
391}
392declare i528 @llvm.bswap.i528(i528)
393