1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+bmi,+tbm < %s | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-unknown"
6
7; Stack reload folding tests.
8;
9; By including a nop call with sideeffects we can force a partial register spill of the
10; relevant registers and check that the reload is correctly folded into the instruction.
11
12define i32 @stack_fold_bextri_u32(i32 %a0) {
13; CHECK-LABEL: stack_fold_bextri_u32:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    pushq %rbp
16; CHECK-NEXT:    .cfi_def_cfa_offset 16
17; CHECK-NEXT:    pushq %r15
18; CHECK-NEXT:    .cfi_def_cfa_offset 24
19; CHECK-NEXT:    pushq %r14
20; CHECK-NEXT:    .cfi_def_cfa_offset 32
21; CHECK-NEXT:    pushq %r13
22; CHECK-NEXT:    .cfi_def_cfa_offset 40
23; CHECK-NEXT:    pushq %r12
24; CHECK-NEXT:    .cfi_def_cfa_offset 48
25; CHECK-NEXT:    pushq %rbx
26; CHECK-NEXT:    .cfi_def_cfa_offset 56
27; CHECK-NEXT:    .cfi_offset %rbx, -56
28; CHECK-NEXT:    .cfi_offset %r12, -48
29; CHECK-NEXT:    .cfi_offset %r13, -40
30; CHECK-NEXT:    .cfi_offset %r14, -32
31; CHECK-NEXT:    .cfi_offset %r15, -24
32; CHECK-NEXT:    .cfi_offset %rbp, -16
33; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
34; CHECK-NEXT:    #APP
35; CHECK-NEXT:    nop
36; CHECK-NEXT:    #NO_APP
37; CHECK-NEXT:    bextrl $3841, {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
38; CHECK-NEXT:    # imm = 0xF01
39; CHECK-NEXT:    popq %rbx
40; CHECK-NEXT:    .cfi_def_cfa_offset 48
41; CHECK-NEXT:    popq %r12
42; CHECK-NEXT:    .cfi_def_cfa_offset 40
43; CHECK-NEXT:    popq %r13
44; CHECK-NEXT:    .cfi_def_cfa_offset 32
45; CHECK-NEXT:    popq %r14
46; CHECK-NEXT:    .cfi_def_cfa_offset 24
47; CHECK-NEXT:    popq %r15
48; CHECK-NEXT:    .cfi_def_cfa_offset 16
49; CHECK-NEXT:    popq %rbp
50; CHECK-NEXT:    .cfi_def_cfa_offset 8
51; CHECK-NEXT:    retq
52  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
53  %2 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a0, i32 3841)
54  ret i32 %2
55}
56declare i32 @llvm.x86.tbm.bextri.u32(i32, i32)
57
58define i64 @stack_fold_bextri_u64(i64 %a0) {
59; CHECK-LABEL: stack_fold_bextri_u64:
60; CHECK:       # %bb.0:
61; CHECK-NEXT:    pushq %rbp
62; CHECK-NEXT:    .cfi_def_cfa_offset 16
63; CHECK-NEXT:    pushq %r15
64; CHECK-NEXT:    .cfi_def_cfa_offset 24
65; CHECK-NEXT:    pushq %r14
66; CHECK-NEXT:    .cfi_def_cfa_offset 32
67; CHECK-NEXT:    pushq %r13
68; CHECK-NEXT:    .cfi_def_cfa_offset 40
69; CHECK-NEXT:    pushq %r12
70; CHECK-NEXT:    .cfi_def_cfa_offset 48
71; CHECK-NEXT:    pushq %rbx
72; CHECK-NEXT:    .cfi_def_cfa_offset 56
73; CHECK-NEXT:    .cfi_offset %rbx, -56
74; CHECK-NEXT:    .cfi_offset %r12, -48
75; CHECK-NEXT:    .cfi_offset %r13, -40
76; CHECK-NEXT:    .cfi_offset %r14, -32
77; CHECK-NEXT:    .cfi_offset %r15, -24
78; CHECK-NEXT:    .cfi_offset %rbp, -16
79; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
80; CHECK-NEXT:    #APP
81; CHECK-NEXT:    nop
82; CHECK-NEXT:    #NO_APP
83; CHECK-NEXT:    bextrq $3841, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
84; CHECK-NEXT:    # imm = 0xF01
85; CHECK-NEXT:    popq %rbx
86; CHECK-NEXT:    .cfi_def_cfa_offset 48
87; CHECK-NEXT:    popq %r12
88; CHECK-NEXT:    .cfi_def_cfa_offset 40
89; CHECK-NEXT:    popq %r13
90; CHECK-NEXT:    .cfi_def_cfa_offset 32
91; CHECK-NEXT:    popq %r14
92; CHECK-NEXT:    .cfi_def_cfa_offset 24
93; CHECK-NEXT:    popq %r15
94; CHECK-NEXT:    .cfi_def_cfa_offset 16
95; CHECK-NEXT:    popq %rbp
96; CHECK-NEXT:    .cfi_def_cfa_offset 8
97; CHECK-NEXT:    retq
98  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
99  %2 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a0, i64 3841)
100  ret i64 %2
101}
102declare i64 @llvm.x86.tbm.bextri.u64(i64, i64)
103
104define i32 @stack_fold_blcfill_u32(i32 %a0) {
105; CHECK-LABEL: stack_fold_blcfill_u32:
106; CHECK:       # %bb.0:
107; CHECK-NEXT:    pushq %rbp
108; CHECK-NEXT:    .cfi_def_cfa_offset 16
109; CHECK-NEXT:    pushq %r15
110; CHECK-NEXT:    .cfi_def_cfa_offset 24
111; CHECK-NEXT:    pushq %r14
112; CHECK-NEXT:    .cfi_def_cfa_offset 32
113; CHECK-NEXT:    pushq %r13
114; CHECK-NEXT:    .cfi_def_cfa_offset 40
115; CHECK-NEXT:    pushq %r12
116; CHECK-NEXT:    .cfi_def_cfa_offset 48
117; CHECK-NEXT:    pushq %rbx
118; CHECK-NEXT:    .cfi_def_cfa_offset 56
119; CHECK-NEXT:    .cfi_offset %rbx, -56
120; CHECK-NEXT:    .cfi_offset %r12, -48
121; CHECK-NEXT:    .cfi_offset %r13, -40
122; CHECK-NEXT:    .cfi_offset %r14, -32
123; CHECK-NEXT:    .cfi_offset %r15, -24
124; CHECK-NEXT:    .cfi_offset %rbp, -16
125; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
126; CHECK-NEXT:    #APP
127; CHECK-NEXT:    nop
128; CHECK-NEXT:    #NO_APP
129; CHECK-NEXT:    blcfilll {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
130; CHECK-NEXT:    popq %rbx
131; CHECK-NEXT:    .cfi_def_cfa_offset 48
132; CHECK-NEXT:    popq %r12
133; CHECK-NEXT:    .cfi_def_cfa_offset 40
134; CHECK-NEXT:    popq %r13
135; CHECK-NEXT:    .cfi_def_cfa_offset 32
136; CHECK-NEXT:    popq %r14
137; CHECK-NEXT:    .cfi_def_cfa_offset 24
138; CHECK-NEXT:    popq %r15
139; CHECK-NEXT:    .cfi_def_cfa_offset 16
140; CHECK-NEXT:    popq %rbp
141; CHECK-NEXT:    .cfi_def_cfa_offset 8
142; CHECK-NEXT:    retq
143  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
144  %2 = add i32 %a0, 1
145  %3 = and i32 %a0, %2
146  ret i32 %3
147}
148
149define i64 @stack_fold_blcfill_u64(i64 %a0) {
150; CHECK-LABEL: stack_fold_blcfill_u64:
151; CHECK:       # %bb.0:
152; CHECK-NEXT:    pushq %rbp
153; CHECK-NEXT:    .cfi_def_cfa_offset 16
154; CHECK-NEXT:    pushq %r15
155; CHECK-NEXT:    .cfi_def_cfa_offset 24
156; CHECK-NEXT:    pushq %r14
157; CHECK-NEXT:    .cfi_def_cfa_offset 32
158; CHECK-NEXT:    pushq %r13
159; CHECK-NEXT:    .cfi_def_cfa_offset 40
160; CHECK-NEXT:    pushq %r12
161; CHECK-NEXT:    .cfi_def_cfa_offset 48
162; CHECK-NEXT:    pushq %rbx
163; CHECK-NEXT:    .cfi_def_cfa_offset 56
164; CHECK-NEXT:    .cfi_offset %rbx, -56
165; CHECK-NEXT:    .cfi_offset %r12, -48
166; CHECK-NEXT:    .cfi_offset %r13, -40
167; CHECK-NEXT:    .cfi_offset %r14, -32
168; CHECK-NEXT:    .cfi_offset %r15, -24
169; CHECK-NEXT:    .cfi_offset %rbp, -16
170; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
171; CHECK-NEXT:    #APP
172; CHECK-NEXT:    nop
173; CHECK-NEXT:    #NO_APP
174; CHECK-NEXT:    blcfillq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
175; CHECK-NEXT:    popq %rbx
176; CHECK-NEXT:    .cfi_def_cfa_offset 48
177; CHECK-NEXT:    popq %r12
178; CHECK-NEXT:    .cfi_def_cfa_offset 40
179; CHECK-NEXT:    popq %r13
180; CHECK-NEXT:    .cfi_def_cfa_offset 32
181; CHECK-NEXT:    popq %r14
182; CHECK-NEXT:    .cfi_def_cfa_offset 24
183; CHECK-NEXT:    popq %r15
184; CHECK-NEXT:    .cfi_def_cfa_offset 16
185; CHECK-NEXT:    popq %rbp
186; CHECK-NEXT:    .cfi_def_cfa_offset 8
187; CHECK-NEXT:    retq
188  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
189  %2 = add i64 %a0, 1
190  %3 = and i64 %a0, %2
191  ret i64 %3
192}
193
194define i32 @stack_fold_blci_u32(i32 %a0) {
195; CHECK-LABEL: stack_fold_blci_u32:
196; CHECK:       # %bb.0:
197; CHECK-NEXT:    pushq %rbp
198; CHECK-NEXT:    .cfi_def_cfa_offset 16
199; CHECK-NEXT:    pushq %r15
200; CHECK-NEXT:    .cfi_def_cfa_offset 24
201; CHECK-NEXT:    pushq %r14
202; CHECK-NEXT:    .cfi_def_cfa_offset 32
203; CHECK-NEXT:    pushq %r13
204; CHECK-NEXT:    .cfi_def_cfa_offset 40
205; CHECK-NEXT:    pushq %r12
206; CHECK-NEXT:    .cfi_def_cfa_offset 48
207; CHECK-NEXT:    pushq %rbx
208; CHECK-NEXT:    .cfi_def_cfa_offset 56
209; CHECK-NEXT:    .cfi_offset %rbx, -56
210; CHECK-NEXT:    .cfi_offset %r12, -48
211; CHECK-NEXT:    .cfi_offset %r13, -40
212; CHECK-NEXT:    .cfi_offset %r14, -32
213; CHECK-NEXT:    .cfi_offset %r15, -24
214; CHECK-NEXT:    .cfi_offset %rbp, -16
215; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
216; CHECK-NEXT:    #APP
217; CHECK-NEXT:    nop
218; CHECK-NEXT:    #NO_APP
219; CHECK-NEXT:    blcil {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
220; CHECK-NEXT:    popq %rbx
221; CHECK-NEXT:    .cfi_def_cfa_offset 48
222; CHECK-NEXT:    popq %r12
223; CHECK-NEXT:    .cfi_def_cfa_offset 40
224; CHECK-NEXT:    popq %r13
225; CHECK-NEXT:    .cfi_def_cfa_offset 32
226; CHECK-NEXT:    popq %r14
227; CHECK-NEXT:    .cfi_def_cfa_offset 24
228; CHECK-NEXT:    popq %r15
229; CHECK-NEXT:    .cfi_def_cfa_offset 16
230; CHECK-NEXT:    popq %rbp
231; CHECK-NEXT:    .cfi_def_cfa_offset 8
232; CHECK-NEXT:    retq
233  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
234  %2 = add i32 %a0, 1
235  %3 = xor i32 %2, -1
236  %4 = or i32 %a0, %3
237  ret i32 %4
238}
239
240define i64 @stack_fold_blci_u64(i64 %a0) {
241; CHECK-LABEL: stack_fold_blci_u64:
242; CHECK:       # %bb.0:
243; CHECK-NEXT:    pushq %rbp
244; CHECK-NEXT:    .cfi_def_cfa_offset 16
245; CHECK-NEXT:    pushq %r15
246; CHECK-NEXT:    .cfi_def_cfa_offset 24
247; CHECK-NEXT:    pushq %r14
248; CHECK-NEXT:    .cfi_def_cfa_offset 32
249; CHECK-NEXT:    pushq %r13
250; CHECK-NEXT:    .cfi_def_cfa_offset 40
251; CHECK-NEXT:    pushq %r12
252; CHECK-NEXT:    .cfi_def_cfa_offset 48
253; CHECK-NEXT:    pushq %rbx
254; CHECK-NEXT:    .cfi_def_cfa_offset 56
255; CHECK-NEXT:    .cfi_offset %rbx, -56
256; CHECK-NEXT:    .cfi_offset %r12, -48
257; CHECK-NEXT:    .cfi_offset %r13, -40
258; CHECK-NEXT:    .cfi_offset %r14, -32
259; CHECK-NEXT:    .cfi_offset %r15, -24
260; CHECK-NEXT:    .cfi_offset %rbp, -16
261; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
262; CHECK-NEXT:    #APP
263; CHECK-NEXT:    nop
264; CHECK-NEXT:    #NO_APP
265; CHECK-NEXT:    blciq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
266; CHECK-NEXT:    popq %rbx
267; CHECK-NEXT:    .cfi_def_cfa_offset 48
268; CHECK-NEXT:    popq %r12
269; CHECK-NEXT:    .cfi_def_cfa_offset 40
270; CHECK-NEXT:    popq %r13
271; CHECK-NEXT:    .cfi_def_cfa_offset 32
272; CHECK-NEXT:    popq %r14
273; CHECK-NEXT:    .cfi_def_cfa_offset 24
274; CHECK-NEXT:    popq %r15
275; CHECK-NEXT:    .cfi_def_cfa_offset 16
276; CHECK-NEXT:    popq %rbp
277; CHECK-NEXT:    .cfi_def_cfa_offset 8
278; CHECK-NEXT:    retq
279  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
280  %2 = add i64 %a0, 1
281  %3 = xor i64 %2, -1
282  %4 = or i64 %a0, %3
283  ret i64 %4
284}
285
286define i32 @stack_fold_blcic_u32(i32 %a0) {
287; CHECK-LABEL: stack_fold_blcic_u32:
288; CHECK:       # %bb.0:
289; CHECK-NEXT:    pushq %rbp
290; CHECK-NEXT:    .cfi_def_cfa_offset 16
291; CHECK-NEXT:    pushq %r15
292; CHECK-NEXT:    .cfi_def_cfa_offset 24
293; CHECK-NEXT:    pushq %r14
294; CHECK-NEXT:    .cfi_def_cfa_offset 32
295; CHECK-NEXT:    pushq %r13
296; CHECK-NEXT:    .cfi_def_cfa_offset 40
297; CHECK-NEXT:    pushq %r12
298; CHECK-NEXT:    .cfi_def_cfa_offset 48
299; CHECK-NEXT:    pushq %rbx
300; CHECK-NEXT:    .cfi_def_cfa_offset 56
301; CHECK-NEXT:    .cfi_offset %rbx, -56
302; CHECK-NEXT:    .cfi_offset %r12, -48
303; CHECK-NEXT:    .cfi_offset %r13, -40
304; CHECK-NEXT:    .cfi_offset %r14, -32
305; CHECK-NEXT:    .cfi_offset %r15, -24
306; CHECK-NEXT:    .cfi_offset %rbp, -16
307; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
308; CHECK-NEXT:    #APP
309; CHECK-NEXT:    nop
310; CHECK-NEXT:    #NO_APP
311; CHECK-NEXT:    blcicl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
312; CHECK-NEXT:    popq %rbx
313; CHECK-NEXT:    .cfi_def_cfa_offset 48
314; CHECK-NEXT:    popq %r12
315; CHECK-NEXT:    .cfi_def_cfa_offset 40
316; CHECK-NEXT:    popq %r13
317; CHECK-NEXT:    .cfi_def_cfa_offset 32
318; CHECK-NEXT:    popq %r14
319; CHECK-NEXT:    .cfi_def_cfa_offset 24
320; CHECK-NEXT:    popq %r15
321; CHECK-NEXT:    .cfi_def_cfa_offset 16
322; CHECK-NEXT:    popq %rbp
323; CHECK-NEXT:    .cfi_def_cfa_offset 8
324; CHECK-NEXT:    retq
325  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
326  %2 = add i32 %a0, 1
327  %3 = xor i32 %a0, -1
328  %4 = and i32 %2, %3
329  ret i32 %4
330}
331
332define i64 @stack_fold_blcic_u64(i64 %a0) {
333; CHECK-LABEL: stack_fold_blcic_u64:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    pushq %rbp
336; CHECK-NEXT:    .cfi_def_cfa_offset 16
337; CHECK-NEXT:    pushq %r15
338; CHECK-NEXT:    .cfi_def_cfa_offset 24
339; CHECK-NEXT:    pushq %r14
340; CHECK-NEXT:    .cfi_def_cfa_offset 32
341; CHECK-NEXT:    pushq %r13
342; CHECK-NEXT:    .cfi_def_cfa_offset 40
343; CHECK-NEXT:    pushq %r12
344; CHECK-NEXT:    .cfi_def_cfa_offset 48
345; CHECK-NEXT:    pushq %rbx
346; CHECK-NEXT:    .cfi_def_cfa_offset 56
347; CHECK-NEXT:    .cfi_offset %rbx, -56
348; CHECK-NEXT:    .cfi_offset %r12, -48
349; CHECK-NEXT:    .cfi_offset %r13, -40
350; CHECK-NEXT:    .cfi_offset %r14, -32
351; CHECK-NEXT:    .cfi_offset %r15, -24
352; CHECK-NEXT:    .cfi_offset %rbp, -16
353; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
354; CHECK-NEXT:    #APP
355; CHECK-NEXT:    nop
356; CHECK-NEXT:    #NO_APP
357; CHECK-NEXT:    blcicq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
358; CHECK-NEXT:    popq %rbx
359; CHECK-NEXT:    .cfi_def_cfa_offset 48
360; CHECK-NEXT:    popq %r12
361; CHECK-NEXT:    .cfi_def_cfa_offset 40
362; CHECK-NEXT:    popq %r13
363; CHECK-NEXT:    .cfi_def_cfa_offset 32
364; CHECK-NEXT:    popq %r14
365; CHECK-NEXT:    .cfi_def_cfa_offset 24
366; CHECK-NEXT:    popq %r15
367; CHECK-NEXT:    .cfi_def_cfa_offset 16
368; CHECK-NEXT:    popq %rbp
369; CHECK-NEXT:    .cfi_def_cfa_offset 8
370; CHECK-NEXT:    retq
371  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
372  %2 = add i64 %a0, 1
373  %3 = xor i64 %a0, -1
374  %4 = and i64 %2, %3
375  ret i64 %4
376}
377
378define i32 @stack_fold_blcmsk_u32(i32 %a0) {
379; CHECK-LABEL: stack_fold_blcmsk_u32:
380; CHECK:       # %bb.0:
381; CHECK-NEXT:    pushq %rbp
382; CHECK-NEXT:    .cfi_def_cfa_offset 16
383; CHECK-NEXT:    pushq %r15
384; CHECK-NEXT:    .cfi_def_cfa_offset 24
385; CHECK-NEXT:    pushq %r14
386; CHECK-NEXT:    .cfi_def_cfa_offset 32
387; CHECK-NEXT:    pushq %r13
388; CHECK-NEXT:    .cfi_def_cfa_offset 40
389; CHECK-NEXT:    pushq %r12
390; CHECK-NEXT:    .cfi_def_cfa_offset 48
391; CHECK-NEXT:    pushq %rbx
392; CHECK-NEXT:    .cfi_def_cfa_offset 56
393; CHECK-NEXT:    .cfi_offset %rbx, -56
394; CHECK-NEXT:    .cfi_offset %r12, -48
395; CHECK-NEXT:    .cfi_offset %r13, -40
396; CHECK-NEXT:    .cfi_offset %r14, -32
397; CHECK-NEXT:    .cfi_offset %r15, -24
398; CHECK-NEXT:    .cfi_offset %rbp, -16
399; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
400; CHECK-NEXT:    #APP
401; CHECK-NEXT:    nop
402; CHECK-NEXT:    #NO_APP
403; CHECK-NEXT:    blcmskl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
404; CHECK-NEXT:    popq %rbx
405; CHECK-NEXT:    .cfi_def_cfa_offset 48
406; CHECK-NEXT:    popq %r12
407; CHECK-NEXT:    .cfi_def_cfa_offset 40
408; CHECK-NEXT:    popq %r13
409; CHECK-NEXT:    .cfi_def_cfa_offset 32
410; CHECK-NEXT:    popq %r14
411; CHECK-NEXT:    .cfi_def_cfa_offset 24
412; CHECK-NEXT:    popq %r15
413; CHECK-NEXT:    .cfi_def_cfa_offset 16
414; CHECK-NEXT:    popq %rbp
415; CHECK-NEXT:    .cfi_def_cfa_offset 8
416; CHECK-NEXT:    retq
417  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
418  %2 = add i32 %a0, 1
419  %3 = xor i32 %a0, %2
420  ret i32 %3
421}
422
423define i64 @stack_fold_blcmsk_u64(i64 %a0) {
424; CHECK-LABEL: stack_fold_blcmsk_u64:
425; CHECK:       # %bb.0:
426; CHECK-NEXT:    pushq %rbp
427; CHECK-NEXT:    .cfi_def_cfa_offset 16
428; CHECK-NEXT:    pushq %r15
429; CHECK-NEXT:    .cfi_def_cfa_offset 24
430; CHECK-NEXT:    pushq %r14
431; CHECK-NEXT:    .cfi_def_cfa_offset 32
432; CHECK-NEXT:    pushq %r13
433; CHECK-NEXT:    .cfi_def_cfa_offset 40
434; CHECK-NEXT:    pushq %r12
435; CHECK-NEXT:    .cfi_def_cfa_offset 48
436; CHECK-NEXT:    pushq %rbx
437; CHECK-NEXT:    .cfi_def_cfa_offset 56
438; CHECK-NEXT:    .cfi_offset %rbx, -56
439; CHECK-NEXT:    .cfi_offset %r12, -48
440; CHECK-NEXT:    .cfi_offset %r13, -40
441; CHECK-NEXT:    .cfi_offset %r14, -32
442; CHECK-NEXT:    .cfi_offset %r15, -24
443; CHECK-NEXT:    .cfi_offset %rbp, -16
444; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
445; CHECK-NEXT:    #APP
446; CHECK-NEXT:    nop
447; CHECK-NEXT:    #NO_APP
448; CHECK-NEXT:    blcmskq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
449; CHECK-NEXT:    popq %rbx
450; CHECK-NEXT:    .cfi_def_cfa_offset 48
451; CHECK-NEXT:    popq %r12
452; CHECK-NEXT:    .cfi_def_cfa_offset 40
453; CHECK-NEXT:    popq %r13
454; CHECK-NEXT:    .cfi_def_cfa_offset 32
455; CHECK-NEXT:    popq %r14
456; CHECK-NEXT:    .cfi_def_cfa_offset 24
457; CHECK-NEXT:    popq %r15
458; CHECK-NEXT:    .cfi_def_cfa_offset 16
459; CHECK-NEXT:    popq %rbp
460; CHECK-NEXT:    .cfi_def_cfa_offset 8
461; CHECK-NEXT:    retq
462  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
463  %2 = add i64 %a0, 1
464  %3 = xor i64 %a0, %2
465  ret i64 %3
466}
467
468define i32 @stack_fold_blcs_u32(i32 %a0) {
469; CHECK-LABEL: stack_fold_blcs_u32:
470; CHECK:       # %bb.0:
471; CHECK-NEXT:    pushq %rbp
472; CHECK-NEXT:    .cfi_def_cfa_offset 16
473; CHECK-NEXT:    pushq %r15
474; CHECK-NEXT:    .cfi_def_cfa_offset 24
475; CHECK-NEXT:    pushq %r14
476; CHECK-NEXT:    .cfi_def_cfa_offset 32
477; CHECK-NEXT:    pushq %r13
478; CHECK-NEXT:    .cfi_def_cfa_offset 40
479; CHECK-NEXT:    pushq %r12
480; CHECK-NEXT:    .cfi_def_cfa_offset 48
481; CHECK-NEXT:    pushq %rbx
482; CHECK-NEXT:    .cfi_def_cfa_offset 56
483; CHECK-NEXT:    .cfi_offset %rbx, -56
484; CHECK-NEXT:    .cfi_offset %r12, -48
485; CHECK-NEXT:    .cfi_offset %r13, -40
486; CHECK-NEXT:    .cfi_offset %r14, -32
487; CHECK-NEXT:    .cfi_offset %r15, -24
488; CHECK-NEXT:    .cfi_offset %rbp, -16
489; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
490; CHECK-NEXT:    #APP
491; CHECK-NEXT:    nop
492; CHECK-NEXT:    #NO_APP
493; CHECK-NEXT:    blcsl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
494; CHECK-NEXT:    popq %rbx
495; CHECK-NEXT:    .cfi_def_cfa_offset 48
496; CHECK-NEXT:    popq %r12
497; CHECK-NEXT:    .cfi_def_cfa_offset 40
498; CHECK-NEXT:    popq %r13
499; CHECK-NEXT:    .cfi_def_cfa_offset 32
500; CHECK-NEXT:    popq %r14
501; CHECK-NEXT:    .cfi_def_cfa_offset 24
502; CHECK-NEXT:    popq %r15
503; CHECK-NEXT:    .cfi_def_cfa_offset 16
504; CHECK-NEXT:    popq %rbp
505; CHECK-NEXT:    .cfi_def_cfa_offset 8
506; CHECK-NEXT:    retq
507  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
508  %2 = add i32 %a0, 1
509  %3 = or i32 %a0, %2
510  ret i32 %3
511}
512
513define i64 @stack_fold_blcs_u64(i64 %a0) {
514; CHECK-LABEL: stack_fold_blcs_u64:
515; CHECK:       # %bb.0:
516; CHECK-NEXT:    pushq %rbp
517; CHECK-NEXT:    .cfi_def_cfa_offset 16
518; CHECK-NEXT:    pushq %r15
519; CHECK-NEXT:    .cfi_def_cfa_offset 24
520; CHECK-NEXT:    pushq %r14
521; CHECK-NEXT:    .cfi_def_cfa_offset 32
522; CHECK-NEXT:    pushq %r13
523; CHECK-NEXT:    .cfi_def_cfa_offset 40
524; CHECK-NEXT:    pushq %r12
525; CHECK-NEXT:    .cfi_def_cfa_offset 48
526; CHECK-NEXT:    pushq %rbx
527; CHECK-NEXT:    .cfi_def_cfa_offset 56
528; CHECK-NEXT:    .cfi_offset %rbx, -56
529; CHECK-NEXT:    .cfi_offset %r12, -48
530; CHECK-NEXT:    .cfi_offset %r13, -40
531; CHECK-NEXT:    .cfi_offset %r14, -32
532; CHECK-NEXT:    .cfi_offset %r15, -24
533; CHECK-NEXT:    .cfi_offset %rbp, -16
534; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
535; CHECK-NEXT:    #APP
536; CHECK-NEXT:    nop
537; CHECK-NEXT:    #NO_APP
538; CHECK-NEXT:    blcsq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
539; CHECK-NEXT:    popq %rbx
540; CHECK-NEXT:    .cfi_def_cfa_offset 48
541; CHECK-NEXT:    popq %r12
542; CHECK-NEXT:    .cfi_def_cfa_offset 40
543; CHECK-NEXT:    popq %r13
544; CHECK-NEXT:    .cfi_def_cfa_offset 32
545; CHECK-NEXT:    popq %r14
546; CHECK-NEXT:    .cfi_def_cfa_offset 24
547; CHECK-NEXT:    popq %r15
548; CHECK-NEXT:    .cfi_def_cfa_offset 16
549; CHECK-NEXT:    popq %rbp
550; CHECK-NEXT:    .cfi_def_cfa_offset 8
551; CHECK-NEXT:    retq
552  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
553  %2 = add i64 %a0, 1
554  %3 = or i64 %a0, %2
555  ret i64 %3
556}
557
558define i32 @stack_fold_blsfill_u32(i32 %a0) {
559; CHECK-LABEL: stack_fold_blsfill_u32:
560; CHECK:       # %bb.0:
561; CHECK-NEXT:    pushq %rbp
562; CHECK-NEXT:    .cfi_def_cfa_offset 16
563; CHECK-NEXT:    pushq %r15
564; CHECK-NEXT:    .cfi_def_cfa_offset 24
565; CHECK-NEXT:    pushq %r14
566; CHECK-NEXT:    .cfi_def_cfa_offset 32
567; CHECK-NEXT:    pushq %r13
568; CHECK-NEXT:    .cfi_def_cfa_offset 40
569; CHECK-NEXT:    pushq %r12
570; CHECK-NEXT:    .cfi_def_cfa_offset 48
571; CHECK-NEXT:    pushq %rbx
572; CHECK-NEXT:    .cfi_def_cfa_offset 56
573; CHECK-NEXT:    .cfi_offset %rbx, -56
574; CHECK-NEXT:    .cfi_offset %r12, -48
575; CHECK-NEXT:    .cfi_offset %r13, -40
576; CHECK-NEXT:    .cfi_offset %r14, -32
577; CHECK-NEXT:    .cfi_offset %r15, -24
578; CHECK-NEXT:    .cfi_offset %rbp, -16
579; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
580; CHECK-NEXT:    #APP
581; CHECK-NEXT:    nop
582; CHECK-NEXT:    #NO_APP
583; CHECK-NEXT:    blsfilll {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
584; CHECK-NEXT:    popq %rbx
585; CHECK-NEXT:    .cfi_def_cfa_offset 48
586; CHECK-NEXT:    popq %r12
587; CHECK-NEXT:    .cfi_def_cfa_offset 40
588; CHECK-NEXT:    popq %r13
589; CHECK-NEXT:    .cfi_def_cfa_offset 32
590; CHECK-NEXT:    popq %r14
591; CHECK-NEXT:    .cfi_def_cfa_offset 24
592; CHECK-NEXT:    popq %r15
593; CHECK-NEXT:    .cfi_def_cfa_offset 16
594; CHECK-NEXT:    popq %rbp
595; CHECK-NEXT:    .cfi_def_cfa_offset 8
596; CHECK-NEXT:    retq
597  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
598  %2 = sub i32 %a0, 1
599  %3 = or i32 %a0, %2
600  ret i32 %3
601}
602
603define i64 @stack_fold_blsfill_u64(i64 %a0) {
604; CHECK-LABEL: stack_fold_blsfill_u64:
605; CHECK:       # %bb.0:
606; CHECK-NEXT:    pushq %rbp
607; CHECK-NEXT:    .cfi_def_cfa_offset 16
608; CHECK-NEXT:    pushq %r15
609; CHECK-NEXT:    .cfi_def_cfa_offset 24
610; CHECK-NEXT:    pushq %r14
611; CHECK-NEXT:    .cfi_def_cfa_offset 32
612; CHECK-NEXT:    pushq %r13
613; CHECK-NEXT:    .cfi_def_cfa_offset 40
614; CHECK-NEXT:    pushq %r12
615; CHECK-NEXT:    .cfi_def_cfa_offset 48
616; CHECK-NEXT:    pushq %rbx
617; CHECK-NEXT:    .cfi_def_cfa_offset 56
618; CHECK-NEXT:    .cfi_offset %rbx, -56
619; CHECK-NEXT:    .cfi_offset %r12, -48
620; CHECK-NEXT:    .cfi_offset %r13, -40
621; CHECK-NEXT:    .cfi_offset %r14, -32
622; CHECK-NEXT:    .cfi_offset %r15, -24
623; CHECK-NEXT:    .cfi_offset %rbp, -16
624; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
625; CHECK-NEXT:    #APP
626; CHECK-NEXT:    nop
627; CHECK-NEXT:    #NO_APP
628; CHECK-NEXT:    blsfillq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
629; CHECK-NEXT:    popq %rbx
630; CHECK-NEXT:    .cfi_def_cfa_offset 48
631; CHECK-NEXT:    popq %r12
632; CHECK-NEXT:    .cfi_def_cfa_offset 40
633; CHECK-NEXT:    popq %r13
634; CHECK-NEXT:    .cfi_def_cfa_offset 32
635; CHECK-NEXT:    popq %r14
636; CHECK-NEXT:    .cfi_def_cfa_offset 24
637; CHECK-NEXT:    popq %r15
638; CHECK-NEXT:    .cfi_def_cfa_offset 16
639; CHECK-NEXT:    popq %rbp
640; CHECK-NEXT:    .cfi_def_cfa_offset 8
641; CHECK-NEXT:    retq
642  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
643  %2 = sub i64 %a0, 1
644  %3 = or i64 %a0, %2
645  ret i64 %3
646}
647
648define i32 @stack_fold_blsic_u32(i32 %a0) {
649; CHECK-LABEL: stack_fold_blsic_u32:
650; CHECK:       # %bb.0:
651; CHECK-NEXT:    pushq %rbp
652; CHECK-NEXT:    .cfi_def_cfa_offset 16
653; CHECK-NEXT:    pushq %r15
654; CHECK-NEXT:    .cfi_def_cfa_offset 24
655; CHECK-NEXT:    pushq %r14
656; CHECK-NEXT:    .cfi_def_cfa_offset 32
657; CHECK-NEXT:    pushq %r13
658; CHECK-NEXT:    .cfi_def_cfa_offset 40
659; CHECK-NEXT:    pushq %r12
660; CHECK-NEXT:    .cfi_def_cfa_offset 48
661; CHECK-NEXT:    pushq %rbx
662; CHECK-NEXT:    .cfi_def_cfa_offset 56
663; CHECK-NEXT:    .cfi_offset %rbx, -56
664; CHECK-NEXT:    .cfi_offset %r12, -48
665; CHECK-NEXT:    .cfi_offset %r13, -40
666; CHECK-NEXT:    .cfi_offset %r14, -32
667; CHECK-NEXT:    .cfi_offset %r15, -24
668; CHECK-NEXT:    .cfi_offset %rbp, -16
669; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
670; CHECK-NEXT:    #APP
671; CHECK-NEXT:    nop
672; CHECK-NEXT:    #NO_APP
673; CHECK-NEXT:    blsicl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
674; CHECK-NEXT:    popq %rbx
675; CHECK-NEXT:    .cfi_def_cfa_offset 48
676; CHECK-NEXT:    popq %r12
677; CHECK-NEXT:    .cfi_def_cfa_offset 40
678; CHECK-NEXT:    popq %r13
679; CHECK-NEXT:    .cfi_def_cfa_offset 32
680; CHECK-NEXT:    popq %r14
681; CHECK-NEXT:    .cfi_def_cfa_offset 24
682; CHECK-NEXT:    popq %r15
683; CHECK-NEXT:    .cfi_def_cfa_offset 16
684; CHECK-NEXT:    popq %rbp
685; CHECK-NEXT:    .cfi_def_cfa_offset 8
686; CHECK-NEXT:    retq
687  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
688  %2 = sub i32 %a0, 1
689  %3 = xor i32 %a0, -1
690  %4 = or i32 %2, %3
691  ret i32 %4
692}
693
694define i64 @stack_fold_blsic_u64(i64 %a0) {
695; CHECK-LABEL: stack_fold_blsic_u64:
696; CHECK:       # %bb.0:
697; CHECK-NEXT:    pushq %rbp
698; CHECK-NEXT:    .cfi_def_cfa_offset 16
699; CHECK-NEXT:    pushq %r15
700; CHECK-NEXT:    .cfi_def_cfa_offset 24
701; CHECK-NEXT:    pushq %r14
702; CHECK-NEXT:    .cfi_def_cfa_offset 32
703; CHECK-NEXT:    pushq %r13
704; CHECK-NEXT:    .cfi_def_cfa_offset 40
705; CHECK-NEXT:    pushq %r12
706; CHECK-NEXT:    .cfi_def_cfa_offset 48
707; CHECK-NEXT:    pushq %rbx
708; CHECK-NEXT:    .cfi_def_cfa_offset 56
709; CHECK-NEXT:    .cfi_offset %rbx, -56
710; CHECK-NEXT:    .cfi_offset %r12, -48
711; CHECK-NEXT:    .cfi_offset %r13, -40
712; CHECK-NEXT:    .cfi_offset %r14, -32
713; CHECK-NEXT:    .cfi_offset %r15, -24
714; CHECK-NEXT:    .cfi_offset %rbp, -16
715; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
716; CHECK-NEXT:    #APP
717; CHECK-NEXT:    nop
718; CHECK-NEXT:    #NO_APP
719; CHECK-NEXT:    blsicq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
720; CHECK-NEXT:    popq %rbx
721; CHECK-NEXT:    .cfi_def_cfa_offset 48
722; CHECK-NEXT:    popq %r12
723; CHECK-NEXT:    .cfi_def_cfa_offset 40
724; CHECK-NEXT:    popq %r13
725; CHECK-NEXT:    .cfi_def_cfa_offset 32
726; CHECK-NEXT:    popq %r14
727; CHECK-NEXT:    .cfi_def_cfa_offset 24
728; CHECK-NEXT:    popq %r15
729; CHECK-NEXT:    .cfi_def_cfa_offset 16
730; CHECK-NEXT:    popq %rbp
731; CHECK-NEXT:    .cfi_def_cfa_offset 8
732; CHECK-NEXT:    retq
733  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
734  %2 = sub i64 %a0, 1
735  %3 = xor i64 %a0, -1
736  %4 = or i64 %2, %3
737  ret i64 %4
738}
739
740define i32 @stack_fold_t1mskc_u32(i32 %a0) {
741; CHECK-LABEL: stack_fold_t1mskc_u32:
742; CHECK:       # %bb.0:
743; CHECK-NEXT:    pushq %rbp
744; CHECK-NEXT:    .cfi_def_cfa_offset 16
745; CHECK-NEXT:    pushq %r15
746; CHECK-NEXT:    .cfi_def_cfa_offset 24
747; CHECK-NEXT:    pushq %r14
748; CHECK-NEXT:    .cfi_def_cfa_offset 32
749; CHECK-NEXT:    pushq %r13
750; CHECK-NEXT:    .cfi_def_cfa_offset 40
751; CHECK-NEXT:    pushq %r12
752; CHECK-NEXT:    .cfi_def_cfa_offset 48
753; CHECK-NEXT:    pushq %rbx
754; CHECK-NEXT:    .cfi_def_cfa_offset 56
755; CHECK-NEXT:    .cfi_offset %rbx, -56
756; CHECK-NEXT:    .cfi_offset %r12, -48
757; CHECK-NEXT:    .cfi_offset %r13, -40
758; CHECK-NEXT:    .cfi_offset %r14, -32
759; CHECK-NEXT:    .cfi_offset %r15, -24
760; CHECK-NEXT:    .cfi_offset %rbp, -16
761; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
762; CHECK-NEXT:    #APP
763; CHECK-NEXT:    nop
764; CHECK-NEXT:    #NO_APP
765; CHECK-NEXT:    t1mskcl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
766; CHECK-NEXT:    popq %rbx
767; CHECK-NEXT:    .cfi_def_cfa_offset 48
768; CHECK-NEXT:    popq %r12
769; CHECK-NEXT:    .cfi_def_cfa_offset 40
770; CHECK-NEXT:    popq %r13
771; CHECK-NEXT:    .cfi_def_cfa_offset 32
772; CHECK-NEXT:    popq %r14
773; CHECK-NEXT:    .cfi_def_cfa_offset 24
774; CHECK-NEXT:    popq %r15
775; CHECK-NEXT:    .cfi_def_cfa_offset 16
776; CHECK-NEXT:    popq %rbp
777; CHECK-NEXT:    .cfi_def_cfa_offset 8
778; CHECK-NEXT:    retq
779  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
780  %2 = add i32 %a0, 1
781  %3 = xor i32 %a0, -1
782  %4 = or i32 %2, %3
783  ret i32 %4
784}
785
786define i64 @stack_fold_t1mskc_u64(i64 %a0) {
787; CHECK-LABEL: stack_fold_t1mskc_u64:
788; CHECK:       # %bb.0:
789; CHECK-NEXT:    pushq %rbp
790; CHECK-NEXT:    .cfi_def_cfa_offset 16
791; CHECK-NEXT:    pushq %r15
792; CHECK-NEXT:    .cfi_def_cfa_offset 24
793; CHECK-NEXT:    pushq %r14
794; CHECK-NEXT:    .cfi_def_cfa_offset 32
795; CHECK-NEXT:    pushq %r13
796; CHECK-NEXT:    .cfi_def_cfa_offset 40
797; CHECK-NEXT:    pushq %r12
798; CHECK-NEXT:    .cfi_def_cfa_offset 48
799; CHECK-NEXT:    pushq %rbx
800; CHECK-NEXT:    .cfi_def_cfa_offset 56
801; CHECK-NEXT:    .cfi_offset %rbx, -56
802; CHECK-NEXT:    .cfi_offset %r12, -48
803; CHECK-NEXT:    .cfi_offset %r13, -40
804; CHECK-NEXT:    .cfi_offset %r14, -32
805; CHECK-NEXT:    .cfi_offset %r15, -24
806; CHECK-NEXT:    .cfi_offset %rbp, -16
807; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
808; CHECK-NEXT:    #APP
809; CHECK-NEXT:    nop
810; CHECK-NEXT:    #NO_APP
811; CHECK-NEXT:    t1mskcq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
812; CHECK-NEXT:    popq %rbx
813; CHECK-NEXT:    .cfi_def_cfa_offset 48
814; CHECK-NEXT:    popq %r12
815; CHECK-NEXT:    .cfi_def_cfa_offset 40
816; CHECK-NEXT:    popq %r13
817; CHECK-NEXT:    .cfi_def_cfa_offset 32
818; CHECK-NEXT:    popq %r14
819; CHECK-NEXT:    .cfi_def_cfa_offset 24
820; CHECK-NEXT:    popq %r15
821; CHECK-NEXT:    .cfi_def_cfa_offset 16
822; CHECK-NEXT:    popq %rbp
823; CHECK-NEXT:    .cfi_def_cfa_offset 8
824; CHECK-NEXT:    retq
825  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
826  %2 = add i64 %a0, 1
827  %3 = xor i64 %a0, -1
828  %4 = or i64 %2, %3
829  ret i64 %4
830}
831
832define i32 @stack_fold_tzmsk_u32(i32 %a0) {
833; CHECK-LABEL: stack_fold_tzmsk_u32:
834; CHECK:       # %bb.0:
835; CHECK-NEXT:    pushq %rbp
836; CHECK-NEXT:    .cfi_def_cfa_offset 16
837; CHECK-NEXT:    pushq %r15
838; CHECK-NEXT:    .cfi_def_cfa_offset 24
839; CHECK-NEXT:    pushq %r14
840; CHECK-NEXT:    .cfi_def_cfa_offset 32
841; CHECK-NEXT:    pushq %r13
842; CHECK-NEXT:    .cfi_def_cfa_offset 40
843; CHECK-NEXT:    pushq %r12
844; CHECK-NEXT:    .cfi_def_cfa_offset 48
845; CHECK-NEXT:    pushq %rbx
846; CHECK-NEXT:    .cfi_def_cfa_offset 56
847; CHECK-NEXT:    .cfi_offset %rbx, -56
848; CHECK-NEXT:    .cfi_offset %r12, -48
849; CHECK-NEXT:    .cfi_offset %r13, -40
850; CHECK-NEXT:    .cfi_offset %r14, -32
851; CHECK-NEXT:    .cfi_offset %r15, -24
852; CHECK-NEXT:    .cfi_offset %rbp, -16
853; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
854; CHECK-NEXT:    #APP
855; CHECK-NEXT:    nop
856; CHECK-NEXT:    #NO_APP
857; CHECK-NEXT:    tzmskl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
858; CHECK-NEXT:    popq %rbx
859; CHECK-NEXT:    .cfi_def_cfa_offset 48
860; CHECK-NEXT:    popq %r12
861; CHECK-NEXT:    .cfi_def_cfa_offset 40
862; CHECK-NEXT:    popq %r13
863; CHECK-NEXT:    .cfi_def_cfa_offset 32
864; CHECK-NEXT:    popq %r14
865; CHECK-NEXT:    .cfi_def_cfa_offset 24
866; CHECK-NEXT:    popq %r15
867; CHECK-NEXT:    .cfi_def_cfa_offset 16
868; CHECK-NEXT:    popq %rbp
869; CHECK-NEXT:    .cfi_def_cfa_offset 8
870; CHECK-NEXT:    retq
871  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
872  %2 = sub i32 %a0, 1
873  %3 = xor i32 %a0, -1
874  %4 = and i32 %2, %3
875  ret i32 %4
876}
877
878define i64 @stack_fold_tzmsk_u64(i64 %a0) {
879; CHECK-LABEL: stack_fold_tzmsk_u64:
880; CHECK:       # %bb.0:
881; CHECK-NEXT:    pushq %rbp
882; CHECK-NEXT:    .cfi_def_cfa_offset 16
883; CHECK-NEXT:    pushq %r15
884; CHECK-NEXT:    .cfi_def_cfa_offset 24
885; CHECK-NEXT:    pushq %r14
886; CHECK-NEXT:    .cfi_def_cfa_offset 32
887; CHECK-NEXT:    pushq %r13
888; CHECK-NEXT:    .cfi_def_cfa_offset 40
889; CHECK-NEXT:    pushq %r12
890; CHECK-NEXT:    .cfi_def_cfa_offset 48
891; CHECK-NEXT:    pushq %rbx
892; CHECK-NEXT:    .cfi_def_cfa_offset 56
893; CHECK-NEXT:    .cfi_offset %rbx, -56
894; CHECK-NEXT:    .cfi_offset %r12, -48
895; CHECK-NEXT:    .cfi_offset %r13, -40
896; CHECK-NEXT:    .cfi_offset %r14, -32
897; CHECK-NEXT:    .cfi_offset %r15, -24
898; CHECK-NEXT:    .cfi_offset %rbp, -16
899; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
900; CHECK-NEXT:    #APP
901; CHECK-NEXT:    nop
902; CHECK-NEXT:    #NO_APP
903; CHECK-NEXT:    tzmskq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
904; CHECK-NEXT:    popq %rbx
905; CHECK-NEXT:    .cfi_def_cfa_offset 48
906; CHECK-NEXT:    popq %r12
907; CHECK-NEXT:    .cfi_def_cfa_offset 40
908; CHECK-NEXT:    popq %r13
909; CHECK-NEXT:    .cfi_def_cfa_offset 32
910; CHECK-NEXT:    popq %r14
911; CHECK-NEXT:    .cfi_def_cfa_offset 24
912; CHECK-NEXT:    popq %r15
913; CHECK-NEXT:    .cfi_def_cfa_offset 16
914; CHECK-NEXT:    popq %rbp
915; CHECK-NEXT:    .cfi_def_cfa_offset 8
916; CHECK-NEXT:    retq
917  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
918  %2 = sub i64 %a0, 1
919  %3 = xor i64 %a0, -1
920  %4 = and i64 %2, %3
921  ret i64 %4
922}
923