1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefixes=X64,SSE
3; RUN: llc < %s -mtriple=x86_64-linux -mattr=avx | FileCheck %s --check-prefixes=X64,AVX,AVX1
4; RUN: llc < %s -mtriple=x86_64-linux -mattr=avx2 | FileCheck %s --check-prefixes=X64,AVX,AVX2
5; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
6
7declare i8 @llvm.smax.i8(i8, i8)
8declare i16 @llvm.smax.i16(i16, i16)
9declare i24 @llvm.smax.i24(i24, i24)
10declare i32 @llvm.smax.i32(i32, i32)
11declare i64 @llvm.smax.i64(i64, i64)
12declare i128 @llvm.smax.i128(i128, i128)
13
14declare <1 x i32> @llvm.smax.v1i32(<1 x i32>, <1 x i32>)
15declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
16declare <3 x i32> @llvm.smax.v3i32(<3 x i32>, <3 x i32>)
17declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
18declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
19
20declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
21declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
22
23define i8 @test_i8(i8 %a, i8 %b) nounwind {
24; X64-LABEL: test_i8:
25; X64:       # %bb.0:
26; X64-NEXT:    movl %esi, %eax
27; X64-NEXT:    cmpb %al, %dil
28; X64-NEXT:    cmovgl %edi, %eax
29; X64-NEXT:    # kill: def $al killed $al killed $eax
30; X64-NEXT:    retq
31;
32; X86-LABEL: test_i8:
33; X86:       # %bb.0:
34; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
35; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
36; X86-NEXT:    cmpb %al, %cl
37; X86-NEXT:    cmovgl %ecx, %eax
38; X86-NEXT:    # kill: def $al killed $al killed $eax
39; X86-NEXT:    retl
40  %r = call i8 @llvm.smax.i8(i8 %a, i8 %b)
41  ret i8 %r
42}
43
44define i16 @test_i16(i16 %a, i16 %b) nounwind {
45; X64-LABEL: test_i16:
46; X64:       # %bb.0:
47; X64-NEXT:    movl %esi, %eax
48; X64-NEXT:    cmpw %ax, %di
49; X64-NEXT:    cmovgl %edi, %eax
50; X64-NEXT:    # kill: def $ax killed $ax killed $eax
51; X64-NEXT:    retq
52;
53; X86-LABEL: test_i16:
54; X86:       # %bb.0:
55; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
56; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
57; X86-NEXT:    cmpw %ax, %cx
58; X86-NEXT:    cmovgl %ecx, %eax
59; X86-NEXT:    # kill: def $ax killed $ax killed $eax
60; X86-NEXT:    retl
61  %r = call i16 @llvm.smax.i16(i16 %a, i16 %b)
62  ret i16 %r
63}
64
65define i24 @test_i24(i24 %a, i24 %b) nounwind {
66; X64-LABEL: test_i24:
67; X64:       # %bb.0:
68; X64-NEXT:    movl %edi, %eax
69; X64-NEXT:    shll $8, %esi
70; X64-NEXT:    sarl $8, %esi
71; X64-NEXT:    shll $8, %eax
72; X64-NEXT:    sarl $8, %eax
73; X64-NEXT:    cmpl %esi, %eax
74; X64-NEXT:    cmovlel %esi, %eax
75; X64-NEXT:    retq
76;
77; X86-LABEL: test_i24:
78; X86:       # %bb.0:
79; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
80; X86-NEXT:    shll $8, %ecx
81; X86-NEXT:    sarl $8, %ecx
82; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
83; X86-NEXT:    shll $8, %eax
84; X86-NEXT:    sarl $8, %eax
85; X86-NEXT:    cmpl %ecx, %eax
86; X86-NEXT:    cmovlel %ecx, %eax
87; X86-NEXT:    retl
88  %r = call i24 @llvm.smax.i24(i24 %a, i24 %b)
89  ret i24 %r
90}
91
92define i32 @test_i32(i32 %a, i32 %b) nounwind {
93; X64-LABEL: test_i32:
94; X64:       # %bb.0:
95; X64-NEXT:    movl %esi, %eax
96; X64-NEXT:    cmpl %esi, %edi
97; X64-NEXT:    cmovgl %edi, %eax
98; X64-NEXT:    retq
99;
100; X86-LABEL: test_i32:
101; X86:       # %bb.0:
102; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
103; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
104; X86-NEXT:    cmpl %eax, %ecx
105; X86-NEXT:    cmovgl %ecx, %eax
106; X86-NEXT:    retl
107  %r = call i32 @llvm.smax.i32(i32 %a, i32 %b)
108  ret i32 %r
109}
110
111define i64 @test_i64(i64 %a, i64 %b) nounwind {
112; X64-LABEL: test_i64:
113; X64:       # %bb.0:
114; X64-NEXT:    movq %rsi, %rax
115; X64-NEXT:    cmpq %rsi, %rdi
116; X64-NEXT:    cmovgq %rdi, %rax
117; X64-NEXT:    retq
118;
119; X86-LABEL: test_i64:
120; X86:       # %bb.0:
121; X86-NEXT:    pushl %edi
122; X86-NEXT:    pushl %esi
123; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
124; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
125; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
126; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
127; X86-NEXT:    cmpl %eax, %ecx
128; X86-NEXT:    movl %eax, %edi
129; X86-NEXT:    cmoval %ecx, %edi
130; X86-NEXT:    cmpl %edx, %esi
131; X86-NEXT:    cmovgl %ecx, %eax
132; X86-NEXT:    cmovel %edi, %eax
133; X86-NEXT:    cmovgl %esi, %edx
134; X86-NEXT:    popl %esi
135; X86-NEXT:    popl %edi
136; X86-NEXT:    retl
137  %r = call i64 @llvm.smax.i64(i64 %a, i64 %b)
138  ret i64 %r
139}
140
141define i128 @test_i128(i128 %a, i128 %b) nounwind {
142; X64-LABEL: test_i128:
143; X64:       # %bb.0:
144; X64-NEXT:    movq %rdx, %rax
145; X64-NEXT:    cmpq %rdx, %rdi
146; X64-NEXT:    cmovaq %rdi, %rdx
147; X64-NEXT:    cmpq %rcx, %rsi
148; X64-NEXT:    cmovgq %rdi, %rax
149; X64-NEXT:    cmoveq %rdx, %rax
150; X64-NEXT:    cmovgq %rsi, %rcx
151; X64-NEXT:    movq %rcx, %rdx
152; X64-NEXT:    retq
153;
154; X86-LABEL: test_i128:
155; X86:       # %bb.0:
156; X86-NEXT:    pushl %ebp
157; X86-NEXT:    pushl %ebx
158; X86-NEXT:    pushl %edi
159; X86-NEXT:    pushl %esi
160; X86-NEXT:    pushl %eax
161; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
162; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
163; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
164; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
165; X86-NEXT:    cmpl %ebx, %edx
166; X86-NEXT:    movl %ebx, %eax
167; X86-NEXT:    cmoval %edx, %eax
168; X86-NEXT:    cmpl %esi, %ecx
169; X86-NEXT:    movl %ebx, %ebp
170; X86-NEXT:    cmoval %edx, %ebp
171; X86-NEXT:    cmovel %eax, %ebp
172; X86-NEXT:    movl %esi, %eax
173; X86-NEXT:    cmoval %ecx, %eax
174; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
175; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
176; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
177; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
178; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
179; X86-NEXT:    movl %edx, %ecx
180; X86-NEXT:    sbbl %edi, %ecx
181; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %esi
182; X86-NEXT:    cmovll {{[0-9]+}}(%esp), %ebx
183; X86-NEXT:    movl %edi, %ecx
184; X86-NEXT:    xorl %edx, %ecx
185; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
186; X86-NEXT:    xorl %eax, %edi
187; X86-NEXT:    orl %ecx, %edi
188; X86-NEXT:    cmovel %ebp, %ebx
189; X86-NEXT:    cmovel (%esp), %esi # 4-byte Folded Reload
190; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
191; X86-NEXT:    cmpl %eax, %edi
192; X86-NEXT:    movl %eax, %ecx
193; X86-NEXT:    cmoval %edi, %ecx
194; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
195; X86-NEXT:    cmpl %edx, %ebp
196; X86-NEXT:    cmovgl %edi, %eax
197; X86-NEXT:    cmovel %ecx, %eax
198; X86-NEXT:    cmovgl %ebp, %edx
199; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
200; X86-NEXT:    movl %edx, 12(%ecx)
201; X86-NEXT:    movl %eax, 8(%ecx)
202; X86-NEXT:    movl %esi, 4(%ecx)
203; X86-NEXT:    movl %ebx, (%ecx)
204; X86-NEXT:    movl %ecx, %eax
205; X86-NEXT:    addl $4, %esp
206; X86-NEXT:    popl %esi
207; X86-NEXT:    popl %edi
208; X86-NEXT:    popl %ebx
209; X86-NEXT:    popl %ebp
210; X86-NEXT:    retl $4
211  %r = call i128 @llvm.smax.i128(i128 %a, i128 %b)
212  ret i128 %r
213}
214
215define <1 x i32> @test_v1i32(<1 x i32> %a, <1 x i32> %b) nounwind {
216; X64-LABEL: test_v1i32:
217; X64:       # %bb.0:
218; X64-NEXT:    movl %esi, %eax
219; X64-NEXT:    cmpl %esi, %edi
220; X64-NEXT:    cmovgl %edi, %eax
221; X64-NEXT:    retq
222;
223; X86-LABEL: test_v1i32:
224; X86:       # %bb.0:
225; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
226; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
227; X86-NEXT:    cmpl %eax, %ecx
228; X86-NEXT:    cmovgl %ecx, %eax
229; X86-NEXT:    retl
230  %r = call <1 x i32> @llvm.smax.v1i32(<1 x i32> %a, <1 x i32> %b)
231  ret <1 x i32> %r
232}
233
234define <2 x i32> @test_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
235; SSE-LABEL: test_v2i32:
236; SSE:       # %bb.0:
237; SSE-NEXT:    movdqa %xmm0, %xmm2
238; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
239; SSE-NEXT:    pand %xmm2, %xmm0
240; SSE-NEXT:    pandn %xmm1, %xmm2
241; SSE-NEXT:    por %xmm0, %xmm2
242; SSE-NEXT:    movdqa %xmm2, %xmm0
243; SSE-NEXT:    retq
244;
245; AVX-LABEL: test_v2i32:
246; AVX:       # %bb.0:
247; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
248; AVX-NEXT:    retq
249;
250; X86-LABEL: test_v2i32:
251; X86:       # %bb.0:
252; X86-NEXT:    pushl %esi
253; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
254; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
255; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
256; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
257; X86-NEXT:    cmpl %eax, %esi
258; X86-NEXT:    cmovgl %esi, %eax
259; X86-NEXT:    cmpl %edx, %ecx
260; X86-NEXT:    cmovgl %ecx, %edx
261; X86-NEXT:    popl %esi
262; X86-NEXT:    retl
263  %r = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
264  ret <2 x i32> %r
265}
266
267define <3 x i32> @test_v3i32(<3 x i32> %a, <3 x i32> %b) nounwind {
268; SSE-LABEL: test_v3i32:
269; SSE:       # %bb.0:
270; SSE-NEXT:    movdqa %xmm0, %xmm2
271; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
272; SSE-NEXT:    pand %xmm2, %xmm0
273; SSE-NEXT:    pandn %xmm1, %xmm2
274; SSE-NEXT:    por %xmm0, %xmm2
275; SSE-NEXT:    movdqa %xmm2, %xmm0
276; SSE-NEXT:    retq
277;
278; AVX-LABEL: test_v3i32:
279; AVX:       # %bb.0:
280; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
281; AVX-NEXT:    retq
282;
283; X86-LABEL: test_v3i32:
284; X86:       # %bb.0:
285; X86-NEXT:    pushl %ebx
286; X86-NEXT:    pushl %edi
287; X86-NEXT:    pushl %esi
288; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
289; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
290; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
291; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
292; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
293; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
294; X86-NEXT:    cmpl %eax, %ebx
295; X86-NEXT:    cmovgl %ebx, %eax
296; X86-NEXT:    cmpl %edx, %edi
297; X86-NEXT:    cmovgl %edi, %edx
298; X86-NEXT:    cmpl %ecx, %esi
299; X86-NEXT:    cmovgl %esi, %ecx
300; X86-NEXT:    popl %esi
301; X86-NEXT:    popl %edi
302; X86-NEXT:    popl %ebx
303; X86-NEXT:    retl
304  %r = call <3 x i32> @llvm.smax.v3i32(<3 x i32> %a, <3 x i32> %b)
305  ret <3 x i32> %r
306}
307
308define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
309; SSE-LABEL: test_v4i32:
310; SSE:       # %bb.0:
311; SSE-NEXT:    movdqa %xmm0, %xmm2
312; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
313; SSE-NEXT:    pand %xmm2, %xmm0
314; SSE-NEXT:    pandn %xmm1, %xmm2
315; SSE-NEXT:    por %xmm0, %xmm2
316; SSE-NEXT:    movdqa %xmm2, %xmm0
317; SSE-NEXT:    retq
318;
319; AVX-LABEL: test_v4i32:
320; AVX:       # %bb.0:
321; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
322; AVX-NEXT:    retq
323;
324; X86-LABEL: test_v4i32:
325; X86:       # %bb.0:
326; X86-NEXT:    pushl %edi
327; X86-NEXT:    pushl %esi
328; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
329; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
330; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
331; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
332; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
333; X86-NEXT:    cmpl %edi, %eax
334; X86-NEXT:    cmovgl %eax, %edi
335; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
336; X86-NEXT:    cmpl %esi, %eax
337; X86-NEXT:    cmovgl %eax, %esi
338; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
339; X86-NEXT:    cmpl %edx, %eax
340; X86-NEXT:    cmovgl %eax, %edx
341; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
342; X86-NEXT:    cmpl %ecx, %eax
343; X86-NEXT:    cmovgl %eax, %ecx
344; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
345; X86-NEXT:    movl %ecx, 12(%eax)
346; X86-NEXT:    movl %edx, 8(%eax)
347; X86-NEXT:    movl %esi, 4(%eax)
348; X86-NEXT:    movl %edi, (%eax)
349; X86-NEXT:    popl %esi
350; X86-NEXT:    popl %edi
351; X86-NEXT:    retl $4
352  %r = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
353  ret <4 x i32> %r
354}
355
356define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
357; SSE-LABEL: test_v8i32:
358; SSE:       # %bb.0:
359; SSE-NEXT:    movdqa %xmm0, %xmm4
360; SSE-NEXT:    pcmpgtd %xmm2, %xmm4
361; SSE-NEXT:    pand %xmm4, %xmm0
362; SSE-NEXT:    pandn %xmm2, %xmm4
363; SSE-NEXT:    por %xmm0, %xmm4
364; SSE-NEXT:    movdqa %xmm1, %xmm2
365; SSE-NEXT:    pcmpgtd %xmm3, %xmm2
366; SSE-NEXT:    pand %xmm2, %xmm1
367; SSE-NEXT:    pandn %xmm3, %xmm2
368; SSE-NEXT:    por %xmm1, %xmm2
369; SSE-NEXT:    movdqa %xmm4, %xmm0
370; SSE-NEXT:    movdqa %xmm2, %xmm1
371; SSE-NEXT:    retq
372;
373; AVX1-LABEL: test_v8i32:
374; AVX1:       # %bb.0:
375; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
376; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
377; AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
378; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
379; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
380; AVX1-NEXT:    retq
381;
382; AVX2-LABEL: test_v8i32:
383; AVX2:       # %bb.0:
384; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
385; AVX2-NEXT:    retq
386;
387; X86-LABEL: test_v8i32:
388; X86:       # %bb.0:
389; X86-NEXT:    pushl %ebp
390; X86-NEXT:    pushl %ebx
391; X86-NEXT:    pushl %edi
392; X86-NEXT:    pushl %esi
393; X86-NEXT:    subl $8, %esp
394; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
395; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
396; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
397; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
398; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
399; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
400; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
401; X86-NEXT:    cmpl %ebp, %eax
402; X86-NEXT:    cmovgl %eax, %ebp
403; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
404; X86-NEXT:    cmpl %ebx, %eax
405; X86-NEXT:    cmovgl %eax, %ebx
406; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
407; X86-NEXT:    cmpl %edi, %eax
408; X86-NEXT:    cmovgl %eax, %edi
409; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
410; X86-NEXT:    cmpl %esi, %eax
411; X86-NEXT:    cmovgl %eax, %esi
412; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
413; X86-NEXT:    cmpl %edx, %eax
414; X86-NEXT:    cmovgl %eax, %edx
415; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
416; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
417; X86-NEXT:    cmpl %ecx, %eax
418; X86-NEXT:    cmovgl %eax, %ecx
419; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
420; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
421; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
422; X86-NEXT:    cmpl %ecx, %eax
423; X86-NEXT:    cmovgl %eax, %ecx
424; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
425; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
426; X86-NEXT:    cmpl %eax, %edx
427; X86-NEXT:    cmovgl %edx, %eax
428; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
429; X86-NEXT:    movl %eax, 28(%edx)
430; X86-NEXT:    movl %ecx, 24(%edx)
431; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
432; X86-NEXT:    movl %eax, 20(%edx)
433; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
434; X86-NEXT:    movl %eax, 16(%edx)
435; X86-NEXT:    movl %esi, 12(%edx)
436; X86-NEXT:    movl %edi, 8(%edx)
437; X86-NEXT:    movl %ebx, 4(%edx)
438; X86-NEXT:    movl %ebp, (%edx)
439; X86-NEXT:    movl %edx, %eax
440; X86-NEXT:    addl $8, %esp
441; X86-NEXT:    popl %esi
442; X86-NEXT:    popl %edi
443; X86-NEXT:    popl %ebx
444; X86-NEXT:    popl %ebp
445; X86-NEXT:    retl $4
446  %r = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %a, <8 x i32> %b)
447  ret <8 x i32> %r
448}
449
450define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
451; SSE-LABEL: test_v8i16:
452; SSE:       # %bb.0:
453; SSE-NEXT:    pmaxsw %xmm1, %xmm0
454; SSE-NEXT:    retq
455;
456; AVX-LABEL: test_v8i16:
457; AVX:       # %bb.0:
458; AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
459; AVX-NEXT:    retq
460;
461; X86-LABEL: test_v8i16:
462; X86:       # %bb.0:
463; X86-NEXT:    pushl %ebp
464; X86-NEXT:    pushl %ebx
465; X86-NEXT:    pushl %edi
466; X86-NEXT:    pushl %esi
467; X86-NEXT:    subl $8, %esp
468; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
469; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
470; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
471; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
472; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
473; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
474; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
475; X86-NEXT:    cmpw %bp, %ax
476; X86-NEXT:    cmovgl %eax, %ebp
477; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
478; X86-NEXT:    cmpw %bx, %ax
479; X86-NEXT:    cmovgl %eax, %ebx
480; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
481; X86-NEXT:    cmpw %di, %ax
482; X86-NEXT:    cmovgl %eax, %edi
483; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
484; X86-NEXT:    cmpw %si, %ax
485; X86-NEXT:    cmovgl %eax, %esi
486; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
487; X86-NEXT:    cmpw %dx, %ax
488; X86-NEXT:    cmovgl %eax, %edx
489; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
490; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
491; X86-NEXT:    cmpw %cx, %ax
492; X86-NEXT:    cmovgl %eax, %ecx
493; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
494; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
495; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
496; X86-NEXT:    cmpw %dx, %ax
497; X86-NEXT:    cmovgl %eax, %edx
498; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
499; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
500; X86-NEXT:    cmpw %ax, %cx
501; X86-NEXT:    cmovgl %ecx, %eax
502; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
503; X86-NEXT:    movw %ax, 14(%ecx)
504; X86-NEXT:    movw %dx, 12(%ecx)
505; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
506; X86-NEXT:    movw %ax, 10(%ecx)
507; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
508; X86-NEXT:    movw %ax, 8(%ecx)
509; X86-NEXT:    movw %si, 6(%ecx)
510; X86-NEXT:    movw %di, 4(%ecx)
511; X86-NEXT:    movw %bx, 2(%ecx)
512; X86-NEXT:    movw %bp, (%ecx)
513; X86-NEXT:    movl %ecx, %eax
514; X86-NEXT:    addl $8, %esp
515; X86-NEXT:    popl %esi
516; X86-NEXT:    popl %edi
517; X86-NEXT:    popl %ebx
518; X86-NEXT:    popl %ebp
519; X86-NEXT:    retl $4
520  %r = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
521  ret <8 x i16> %r
522}
523
524define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
525; SSE-LABEL: test_v16i8:
526; SSE:       # %bb.0:
527; SSE-NEXT:    movdqa %xmm0, %xmm2
528; SSE-NEXT:    pcmpgtb %xmm1, %xmm2
529; SSE-NEXT:    pand %xmm2, %xmm0
530; SSE-NEXT:    pandn %xmm1, %xmm2
531; SSE-NEXT:    por %xmm0, %xmm2
532; SSE-NEXT:    movdqa %xmm2, %xmm0
533; SSE-NEXT:    retq
534;
535; AVX-LABEL: test_v16i8:
536; AVX:       # %bb.0:
537; AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
538; AVX-NEXT:    retq
539;
540; X86-LABEL: test_v16i8:
541; X86:       # %bb.0:
542; X86-NEXT:    pushl %ebp
543; X86-NEXT:    pushl %ebx
544; X86-NEXT:    pushl %edi
545; X86-NEXT:    pushl %esi
546; X86-NEXT:    subl $40, %esp
547; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
548; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
549; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
550; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
551; X86-NEXT:    cmpb %bl, %al
552; X86-NEXT:    cmovgl %eax, %ebx
553; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
554; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
555; X86-NEXT:    cmpb %dl, %al
556; X86-NEXT:    cmovgl %eax, %edx
557; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
558; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
559; X86-NEXT:    cmpb %cl, %al
560; X86-NEXT:    cmovgl %eax, %ecx
561; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
562; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
563; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
564; X86-NEXT:    cmpb %cl, %al
565; X86-NEXT:    cmovgl %eax, %ecx
566; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
567; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
568; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
569; X86-NEXT:    cmpb %cl, %al
570; X86-NEXT:    cmovgl %eax, %ecx
571; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
572; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
573; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
574; X86-NEXT:    cmpb %cl, %al
575; X86-NEXT:    cmovgl %eax, %ecx
576; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
577; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
578; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
579; X86-NEXT:    cmpb %cl, %al
580; X86-NEXT:    cmovgl %eax, %ecx
581; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
582; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
583; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
584; X86-NEXT:    cmpb %cl, %al
585; X86-NEXT:    cmovgl %eax, %ecx
586; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
587; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
588; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
589; X86-NEXT:    cmpb %cl, %al
590; X86-NEXT:    cmovgl %eax, %ecx
591; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
592; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
593; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
594; X86-NEXT:    cmpb %cl, %al
595; X86-NEXT:    cmovgl %eax, %ecx
596; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
597; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
598; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
599; X86-NEXT:    cmpb %cl, %al
600; X86-NEXT:    cmovgl %eax, %ecx
601; X86-NEXT:    movl %ecx, %ebp
602; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
603; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
604; X86-NEXT:    cmpb %cl, %al
605; X86-NEXT:    cmovgl %eax, %ecx
606; X86-NEXT:    movl %ecx, %edi
607; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
608; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
609; X86-NEXT:    cmpb %cl, %al
610; X86-NEXT:    cmovgl %eax, %ecx
611; X86-NEXT:    movl %ecx, %esi
612; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
613; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
614; X86-NEXT:    cmpb %bl, %al
615; X86-NEXT:    cmovgl %eax, %ebx
616; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
617; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
618; X86-NEXT:    cmpb %dl, %al
619; X86-NEXT:    cmovgl %eax, %edx
620; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
621; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
622; X86-NEXT:    cmpb %cl, %al
623; X86-NEXT:    cmovgl %eax, %ecx
624; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
625; X86-NEXT:    movb %cl, 15(%eax)
626; X86-NEXT:    movb %dl, 14(%eax)
627; X86-NEXT:    movb %bl, 13(%eax)
628; X86-NEXT:    movl %esi, %ecx
629; X86-NEXT:    movb %cl, 12(%eax)
630; X86-NEXT:    movl %edi, %ecx
631; X86-NEXT:    movb %cl, 11(%eax)
632; X86-NEXT:    movl %ebp, %ecx
633; X86-NEXT:    movb %cl, 10(%eax)
634; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
635; X86-NEXT:    movb %cl, 9(%eax)
636; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
637; X86-NEXT:    movb %cl, 8(%eax)
638; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
639; X86-NEXT:    movb %cl, 7(%eax)
640; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
641; X86-NEXT:    movb %cl, 6(%eax)
642; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
643; X86-NEXT:    movb %cl, 5(%eax)
644; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
645; X86-NEXT:    movb %cl, 4(%eax)
646; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
647; X86-NEXT:    movb %cl, 3(%eax)
648; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
649; X86-NEXT:    movb %cl, 2(%eax)
650; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
651; X86-NEXT:    movb %cl, 1(%eax)
652; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
653; X86-NEXT:    movb %cl, (%eax)
654; X86-NEXT:    addl $40, %esp
655; X86-NEXT:    popl %esi
656; X86-NEXT:    popl %edi
657; X86-NEXT:    popl %ebx
658; X86-NEXT:    popl %ebp
659; X86-NEXT:    retl $4
660  %r = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
661  ret <16 x i8> %r
662}
663