1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3; RUN:   | FileCheck %s -check-prefix=RV32I
4; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \
5; RUN:   | FileCheck %s -check-prefix=RV32IB
6; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \
7; RUN:   | FileCheck %s -check-prefix=RV32IBP
8
9define i32 @gorc1_i32(i32 %a) nounwind {
10; RV32I-LABEL: gorc1_i32:
11; RV32I:       # %bb.0:
12; RV32I-NEXT:    slli a1, a0, 1
13; RV32I-NEXT:    lui a2, 699051
14; RV32I-NEXT:    addi a2, a2, -1366
15; RV32I-NEXT:    and a1, a1, a2
16; RV32I-NEXT:    srli a2, a0, 1
17; RV32I-NEXT:    lui a3, 349525
18; RV32I-NEXT:    addi a3, a3, 1365
19; RV32I-NEXT:    and a2, a2, a3
20; RV32I-NEXT:    or a0, a2, a0
21; RV32I-NEXT:    or a0, a0, a1
22; RV32I-NEXT:    ret
23;
24; RV32IB-LABEL: gorc1_i32:
25; RV32IB:       # %bb.0:
26; RV32IB-NEXT:    orc.p a0, a0
27; RV32IB-NEXT:    ret
28;
29; RV32IBP-LABEL: gorc1_i32:
30; RV32IBP:       # %bb.0:
31; RV32IBP-NEXT:    orc.p a0, a0
32; RV32IBP-NEXT:    ret
33  %and = shl i32 %a, 1
34  %shl = and i32 %and, -1431655766
35  %and1 = lshr i32 %a, 1
36  %shr = and i32 %and1, 1431655765
37  %or = or i32 %shr, %a
38  %or2 = or i32 %or, %shl
39  ret i32 %or2
40}
41
42define i64 @gorc1_i64(i64 %a) nounwind {
43; RV32I-LABEL: gorc1_i64:
44; RV32I:       # %bb.0:
45; RV32I-NEXT:    slli a2, a0, 1
46; RV32I-NEXT:    slli a3, a1, 1
47; RV32I-NEXT:    lui a4, 699051
48; RV32I-NEXT:    addi a4, a4, -1366
49; RV32I-NEXT:    and a6, a3, a4
50; RV32I-NEXT:    and a2, a2, a4
51; RV32I-NEXT:    srli a4, a1, 1
52; RV32I-NEXT:    srli a5, a0, 1
53; RV32I-NEXT:    lui a3, 349525
54; RV32I-NEXT:    addi a3, a3, 1365
55; RV32I-NEXT:    and a5, a5, a3
56; RV32I-NEXT:    and a3, a4, a3
57; RV32I-NEXT:    or a1, a3, a1
58; RV32I-NEXT:    or a0, a5, a0
59; RV32I-NEXT:    or a0, a0, a2
60; RV32I-NEXT:    or a1, a1, a6
61; RV32I-NEXT:    ret
62;
63; RV32IB-LABEL: gorc1_i64:
64; RV32IB:       # %bb.0:
65; RV32IB-NEXT:    orc.p a0, a0
66; RV32IB-NEXT:    orc.p a1, a1
67; RV32IB-NEXT:    ret
68;
69; RV32IBP-LABEL: gorc1_i64:
70; RV32IBP:       # %bb.0:
71; RV32IBP-NEXT:    orc.p a0, a0
72; RV32IBP-NEXT:    orc.p a1, a1
73; RV32IBP-NEXT:    ret
74  %and = shl i64 %a, 1
75  %shl = and i64 %and, -6148914691236517206
76  %and1 = lshr i64 %a, 1
77  %shr = and i64 %and1, 6148914691236517205
78  %or = or i64 %shr, %a
79  %or2 = or i64 %or, %shl
80  ret i64 %or2
81}
82
83define i32 @gorc2_i32(i32 %a) nounwind {
84; RV32I-LABEL: gorc2_i32:
85; RV32I:       # %bb.0:
86; RV32I-NEXT:    slli a1, a0, 2
87; RV32I-NEXT:    lui a2, 838861
88; RV32I-NEXT:    addi a2, a2, -820
89; RV32I-NEXT:    and a1, a1, a2
90; RV32I-NEXT:    srli a2, a0, 2
91; RV32I-NEXT:    lui a3, 209715
92; RV32I-NEXT:    addi a3, a3, 819
93; RV32I-NEXT:    and a2, a2, a3
94; RV32I-NEXT:    or a0, a2, a0
95; RV32I-NEXT:    or a0, a0, a1
96; RV32I-NEXT:    ret
97;
98; RV32IB-LABEL: gorc2_i32:
99; RV32IB:       # %bb.0:
100; RV32IB-NEXT:    orc2.n a0, a0
101; RV32IB-NEXT:    ret
102;
103; RV32IBP-LABEL: gorc2_i32:
104; RV32IBP:       # %bb.0:
105; RV32IBP-NEXT:    orc2.n a0, a0
106; RV32IBP-NEXT:    ret
107  %and = shl i32 %a, 2
108  %shl = and i32 %and, -858993460
109  %and1 = lshr i32 %a, 2
110  %shr = and i32 %and1, 858993459
111  %or = or i32 %shr, %a
112  %or2 = or i32 %or, %shl
113  ret i32 %or2
114}
115
116define i64 @gorc2_i64(i64 %a) nounwind {
117; RV32I-LABEL: gorc2_i64:
118; RV32I:       # %bb.0:
119; RV32I-NEXT:    slli a2, a0, 2
120; RV32I-NEXT:    slli a3, a1, 2
121; RV32I-NEXT:    lui a4, 838861
122; RV32I-NEXT:    addi a4, a4, -820
123; RV32I-NEXT:    and a6, a3, a4
124; RV32I-NEXT:    and a2, a2, a4
125; RV32I-NEXT:    srli a4, a1, 2
126; RV32I-NEXT:    srli a5, a0, 2
127; RV32I-NEXT:    lui a3, 209715
128; RV32I-NEXT:    addi a3, a3, 819
129; RV32I-NEXT:    and a5, a5, a3
130; RV32I-NEXT:    and a3, a4, a3
131; RV32I-NEXT:    or a1, a3, a1
132; RV32I-NEXT:    or a0, a5, a0
133; RV32I-NEXT:    or a0, a0, a2
134; RV32I-NEXT:    or a1, a1, a6
135; RV32I-NEXT:    ret
136;
137; RV32IB-LABEL: gorc2_i64:
138; RV32IB:       # %bb.0:
139; RV32IB-NEXT:    orc2.n a0, a0
140; RV32IB-NEXT:    orc2.n a1, a1
141; RV32IB-NEXT:    ret
142;
143; RV32IBP-LABEL: gorc2_i64:
144; RV32IBP:       # %bb.0:
145; RV32IBP-NEXT:    orc2.n a0, a0
146; RV32IBP-NEXT:    orc2.n a1, a1
147; RV32IBP-NEXT:    ret
148  %and = shl i64 %a, 2
149  %shl = and i64 %and, -3689348814741910324
150  %and1 = lshr i64 %a, 2
151  %shr = and i64 %and1, 3689348814741910323
152  %or = or i64 %shr, %a
153  %or2 = or i64 %or, %shl
154  ret i64 %or2
155}
156
157define i32 @gorc4_i32(i32 %a) nounwind {
158; RV32I-LABEL: gorc4_i32:
159; RV32I:       # %bb.0:
160; RV32I-NEXT:    slli a1, a0, 4
161; RV32I-NEXT:    lui a2, 986895
162; RV32I-NEXT:    addi a2, a2, 240
163; RV32I-NEXT:    and a1, a1, a2
164; RV32I-NEXT:    srli a2, a0, 4
165; RV32I-NEXT:    lui a3, 61681
166; RV32I-NEXT:    addi a3, a3, -241
167; RV32I-NEXT:    and a2, a2, a3
168; RV32I-NEXT:    or a0, a2, a0
169; RV32I-NEXT:    or a0, a0, a1
170; RV32I-NEXT:    ret
171;
172; RV32IB-LABEL: gorc4_i32:
173; RV32IB:       # %bb.0:
174; RV32IB-NEXT:    orc4.b a0, a0
175; RV32IB-NEXT:    ret
176;
177; RV32IBP-LABEL: gorc4_i32:
178; RV32IBP:       # %bb.0:
179; RV32IBP-NEXT:    orc4.b a0, a0
180; RV32IBP-NEXT:    ret
181  %and = shl i32 %a, 4
182  %shl = and i32 %and, -252645136
183  %and1 = lshr i32 %a, 4
184  %shr = and i32 %and1, 252645135
185  %or = or i32 %shr, %a
186  %or2 = or i32 %or, %shl
187  ret i32 %or2
188}
189
190define i64 @gorc4_i64(i64 %a) nounwind {
191; RV32I-LABEL: gorc4_i64:
192; RV32I:       # %bb.0:
193; RV32I-NEXT:    slli a2, a0, 4
194; RV32I-NEXT:    slli a3, a1, 4
195; RV32I-NEXT:    lui a4, 986895
196; RV32I-NEXT:    addi a4, a4, 240
197; RV32I-NEXT:    and a6, a3, a4
198; RV32I-NEXT:    and a2, a2, a4
199; RV32I-NEXT:    srli a4, a1, 4
200; RV32I-NEXT:    srli a5, a0, 4
201; RV32I-NEXT:    lui a3, 61681
202; RV32I-NEXT:    addi a3, a3, -241
203; RV32I-NEXT:    and a5, a5, a3
204; RV32I-NEXT:    and a3, a4, a3
205; RV32I-NEXT:    or a1, a3, a1
206; RV32I-NEXT:    or a0, a5, a0
207; RV32I-NEXT:    or a0, a0, a2
208; RV32I-NEXT:    or a1, a1, a6
209; RV32I-NEXT:    ret
210;
211; RV32IB-LABEL: gorc4_i64:
212; RV32IB:       # %bb.0:
213; RV32IB-NEXT:    orc4.b a0, a0
214; RV32IB-NEXT:    orc4.b a1, a1
215; RV32IB-NEXT:    ret
216;
217; RV32IBP-LABEL: gorc4_i64:
218; RV32IBP:       # %bb.0:
219; RV32IBP-NEXT:    orc4.b a0, a0
220; RV32IBP-NEXT:    orc4.b a1, a1
221; RV32IBP-NEXT:    ret
222  %and = shl i64 %a, 4
223  %shl = and i64 %and, -1085102592571150096
224  %and1 = lshr i64 %a, 4
225  %shr = and i64 %and1, 1085102592571150095
226  %or = or i64 %shr, %a
227  %or2 = or i64 %or, %shl
228  ret i64 %or2
229}
230
231define i32 @gorc8_i32(i32 %a) nounwind {
232; RV32I-LABEL: gorc8_i32:
233; RV32I:       # %bb.0:
234; RV32I-NEXT:    slli a1, a0, 8
235; RV32I-NEXT:    lui a2, 1044496
236; RV32I-NEXT:    addi a2, a2, -256
237; RV32I-NEXT:    and a1, a1, a2
238; RV32I-NEXT:    srli a2, a0, 8
239; RV32I-NEXT:    lui a3, 4080
240; RV32I-NEXT:    addi a3, a3, 255
241; RV32I-NEXT:    and a2, a2, a3
242; RV32I-NEXT:    or a0, a2, a0
243; RV32I-NEXT:    or a0, a0, a1
244; RV32I-NEXT:    ret
245;
246; RV32IB-LABEL: gorc8_i32:
247; RV32IB:       # %bb.0:
248; RV32IB-NEXT:    orc8.h a0, a0
249; RV32IB-NEXT:    ret
250;
251; RV32IBP-LABEL: gorc8_i32:
252; RV32IBP:       # %bb.0:
253; RV32IBP-NEXT:    orc8.h a0, a0
254; RV32IBP-NEXT:    ret
255  %and = shl i32 %a, 8
256  %shl = and i32 %and, -16711936
257  %and1 = lshr i32 %a, 8
258  %shr = and i32 %and1, 16711935
259  %or = or i32 %shr, %a
260  %or2 = or i32 %or, %shl
261  ret i32 %or2
262}
263
264define i64 @gorc8_i64(i64 %a) nounwind {
265; RV32I-LABEL: gorc8_i64:
266; RV32I:       # %bb.0:
267; RV32I-NEXT:    slli a2, a0, 8
268; RV32I-NEXT:    slli a3, a1, 8
269; RV32I-NEXT:    lui a4, 1044496
270; RV32I-NEXT:    addi a4, a4, -256
271; RV32I-NEXT:    and a6, a3, a4
272; RV32I-NEXT:    and a2, a2, a4
273; RV32I-NEXT:    srli a4, a1, 8
274; RV32I-NEXT:    srli a5, a0, 8
275; RV32I-NEXT:    lui a3, 4080
276; RV32I-NEXT:    addi a3, a3, 255
277; RV32I-NEXT:    and a5, a5, a3
278; RV32I-NEXT:    and a3, a4, a3
279; RV32I-NEXT:    or a1, a3, a1
280; RV32I-NEXT:    or a0, a5, a0
281; RV32I-NEXT:    or a0, a0, a2
282; RV32I-NEXT:    or a1, a1, a6
283; RV32I-NEXT:    ret
284;
285; RV32IB-LABEL: gorc8_i64:
286; RV32IB:       # %bb.0:
287; RV32IB-NEXT:    orc8.h a0, a0
288; RV32IB-NEXT:    orc8.h a1, a1
289; RV32IB-NEXT:    ret
290;
291; RV32IBP-LABEL: gorc8_i64:
292; RV32IBP:       # %bb.0:
293; RV32IBP-NEXT:    orc8.h a0, a0
294; RV32IBP-NEXT:    orc8.h a1, a1
295; RV32IBP-NEXT:    ret
296  %and = shl i64 %a, 8
297  %shl = and i64 %and, -71777214294589696
298  %and1 = lshr i64 %a, 8
299  %shr = and i64 %and1, 71777214294589695
300  %or = or i64 %shr, %a
301  %or2 = or i64 %or, %shl
302  ret i64 %or2
303}
304
305define i32 @gorc16_i32(i32 %a) nounwind {
306; RV32I-LABEL: gorc16_i32:
307; RV32I:       # %bb.0:
308; RV32I-NEXT:    slli a1, a0, 16
309; RV32I-NEXT:    srli a2, a0, 16
310; RV32I-NEXT:    or a0, a2, a0
311; RV32I-NEXT:    or a0, a0, a1
312; RV32I-NEXT:    ret
313;
314; RV32IB-LABEL: gorc16_i32:
315; RV32IB:       # %bb.0:
316; RV32IB-NEXT:    orc16 a0, a0
317; RV32IB-NEXT:    ret
318;
319; RV32IBP-LABEL: gorc16_i32:
320; RV32IBP:       # %bb.0:
321; RV32IBP-NEXT:    orc16 a0, a0
322; RV32IBP-NEXT:    ret
323  %shl = shl i32 %a, 16
324  %shr = lshr i32 %a, 16
325  %or = or i32 %shr, %a
326  %or2 = or i32 %or, %shl
327  ret i32 %or2
328}
329
330define i64 @gorc16_i64(i64 %a) nounwind {
331; RV32I-LABEL: gorc16_i64:
332; RV32I:       # %bb.0:
333; RV32I-NEXT:    slli a2, a1, 16
334; RV32I-NEXT:    slli a3, a0, 16
335; RV32I-NEXT:    srli a4, a0, 16
336; RV32I-NEXT:    srli a5, a1, 16
337; RV32I-NEXT:    or a1, a5, a1
338; RV32I-NEXT:    or a0, a4, a0
339; RV32I-NEXT:    or a0, a0, a3
340; RV32I-NEXT:    or a1, a1, a2
341; RV32I-NEXT:    ret
342;
343; RV32IB-LABEL: gorc16_i64:
344; RV32IB:       # %bb.0:
345; RV32IB-NEXT:    orc16 a0, a0
346; RV32IB-NEXT:    orc16 a1, a1
347; RV32IB-NEXT:    ret
348;
349; RV32IBP-LABEL: gorc16_i64:
350; RV32IBP:       # %bb.0:
351; RV32IBP-NEXT:    orc16 a0, a0
352; RV32IBP-NEXT:    orc16 a1, a1
353; RV32IBP-NEXT:    ret
354  %and = shl i64 %a, 16
355  %shl = and i64 %and, -281470681808896
356  %and1 = lshr i64 %a, 16
357  %shr = and i64 %and1, 281470681808895
358  %or = or i64 %shr, %a
359  %or2 = or i64 %or, %shl
360  ret i64 %or2
361}
362
363define i32 @grev1_i32(i32 %a) nounwind {
364; RV32I-LABEL: grev1_i32:
365; RV32I:       # %bb.0:
366; RV32I-NEXT:    slli a1, a0, 1
367; RV32I-NEXT:    lui a2, 699051
368; RV32I-NEXT:    addi a2, a2, -1366
369; RV32I-NEXT:    and a1, a1, a2
370; RV32I-NEXT:    srli a0, a0, 1
371; RV32I-NEXT:    lui a2, 349525
372; RV32I-NEXT:    addi a2, a2, 1365
373; RV32I-NEXT:    and a0, a0, a2
374; RV32I-NEXT:    or a0, a1, a0
375; RV32I-NEXT:    ret
376;
377; RV32IB-LABEL: grev1_i32:
378; RV32IB:       # %bb.0:
379; RV32IB-NEXT:    rev.p a0, a0
380; RV32IB-NEXT:    ret
381;
382; RV32IBP-LABEL: grev1_i32:
383; RV32IBP:       # %bb.0:
384; RV32IBP-NEXT:    rev.p a0, a0
385; RV32IBP-NEXT:    ret
386  %and = shl i32 %a, 1
387  %shl = and i32 %and, -1431655766
388  %and1 = lshr i32 %a, 1
389  %shr = and i32 %and1, 1431655765
390  %or = or i32 %shl, %shr
391  ret i32 %or
392}
393
394define i64 @grev1_i64(i64 %a) nounwind {
395; RV32I-LABEL: grev1_i64:
396; RV32I:       # %bb.0:
397; RV32I-NEXT:    slli a2, a0, 1
398; RV32I-NEXT:    slli a3, a1, 1
399; RV32I-NEXT:    lui a4, 699051
400; RV32I-NEXT:    addi a4, a4, -1366
401; RV32I-NEXT:    and a3, a3, a4
402; RV32I-NEXT:    and a2, a2, a4
403; RV32I-NEXT:    srli a0, a0, 1
404; RV32I-NEXT:    srli a1, a1, 1
405; RV32I-NEXT:    lui a4, 349525
406; RV32I-NEXT:    addi a4, a4, 1365
407; RV32I-NEXT:    and a1, a1, a4
408; RV32I-NEXT:    and a0, a0, a4
409; RV32I-NEXT:    or a0, a2, a0
410; RV32I-NEXT:    or a1, a3, a1
411; RV32I-NEXT:    ret
412;
413; RV32IB-LABEL: grev1_i64:
414; RV32IB:       # %bb.0:
415; RV32IB-NEXT:    rev.p a0, a0
416; RV32IB-NEXT:    rev.p a1, a1
417; RV32IB-NEXT:    ret
418;
419; RV32IBP-LABEL: grev1_i64:
420; RV32IBP:       # %bb.0:
421; RV32IBP-NEXT:    rev.p a0, a0
422; RV32IBP-NEXT:    rev.p a1, a1
423; RV32IBP-NEXT:    ret
424  %and = shl i64 %a, 1
425  %shl = and i64 %and, -6148914691236517206
426  %and1 = lshr i64 %a, 1
427  %shr = and i64 %and1, 6148914691236517205
428  %or = or i64 %shl, %shr
429  ret i64 %or
430}
431
432define i32 @grev2_i32(i32 %a) nounwind {
433; RV32I-LABEL: grev2_i32:
434; RV32I:       # %bb.0:
435; RV32I-NEXT:    slli a1, a0, 2
436; RV32I-NEXT:    lui a2, 838861
437; RV32I-NEXT:    addi a2, a2, -820
438; RV32I-NEXT:    and a1, a1, a2
439; RV32I-NEXT:    srli a0, a0, 2
440; RV32I-NEXT:    lui a2, 209715
441; RV32I-NEXT:    addi a2, a2, 819
442; RV32I-NEXT:    and a0, a0, a2
443; RV32I-NEXT:    or a0, a1, a0
444; RV32I-NEXT:    ret
445;
446; RV32IB-LABEL: grev2_i32:
447; RV32IB:       # %bb.0:
448; RV32IB-NEXT:    rev2.n a0, a0
449; RV32IB-NEXT:    ret
450;
451; RV32IBP-LABEL: grev2_i32:
452; RV32IBP:       # %bb.0:
453; RV32IBP-NEXT:    rev2.n a0, a0
454; RV32IBP-NEXT:    ret
455  %and = shl i32 %a, 2
456  %shl = and i32 %and, -858993460
457  %and1 = lshr i32 %a, 2
458  %shr = and i32 %and1, 858993459
459  %or = or i32 %shl, %shr
460  ret i32 %or
461}
462
463define i64 @grev2_i64(i64 %a) nounwind {
464; RV32I-LABEL: grev2_i64:
465; RV32I:       # %bb.0:
466; RV32I-NEXT:    slli a2, a0, 2
467; RV32I-NEXT:    slli a3, a1, 2
468; RV32I-NEXT:    lui a4, 838861
469; RV32I-NEXT:    addi a4, a4, -820
470; RV32I-NEXT:    and a3, a3, a4
471; RV32I-NEXT:    and a2, a2, a4
472; RV32I-NEXT:    srli a0, a0, 2
473; RV32I-NEXT:    srli a1, a1, 2
474; RV32I-NEXT:    lui a4, 209715
475; RV32I-NEXT:    addi a4, a4, 819
476; RV32I-NEXT:    and a1, a1, a4
477; RV32I-NEXT:    and a0, a0, a4
478; RV32I-NEXT:    or a0, a2, a0
479; RV32I-NEXT:    or a1, a3, a1
480; RV32I-NEXT:    ret
481;
482; RV32IB-LABEL: grev2_i64:
483; RV32IB:       # %bb.0:
484; RV32IB-NEXT:    rev2.n a0, a0
485; RV32IB-NEXT:    rev2.n a1, a1
486; RV32IB-NEXT:    ret
487;
488; RV32IBP-LABEL: grev2_i64:
489; RV32IBP:       # %bb.0:
490; RV32IBP-NEXT:    rev2.n a0, a0
491; RV32IBP-NEXT:    rev2.n a1, a1
492; RV32IBP-NEXT:    ret
493  %and = shl i64 %a, 2
494  %shl = and i64 %and, -3689348814741910324
495  %and1 = lshr i64 %a, 2
496  %shr = and i64 %and1, 3689348814741910323
497  %or = or i64 %shl, %shr
498  ret i64 %or
499}
500
501define i32 @grev4_i32(i32 %a) nounwind {
502; RV32I-LABEL: grev4_i32:
503; RV32I:       # %bb.0:
504; RV32I-NEXT:    slli a1, a0, 4
505; RV32I-NEXT:    lui a2, 986895
506; RV32I-NEXT:    addi a2, a2, 240
507; RV32I-NEXT:    and a1, a1, a2
508; RV32I-NEXT:    srli a0, a0, 4
509; RV32I-NEXT:    lui a2, 61681
510; RV32I-NEXT:    addi a2, a2, -241
511; RV32I-NEXT:    and a0, a0, a2
512; RV32I-NEXT:    or a0, a1, a0
513; RV32I-NEXT:    ret
514;
515; RV32IB-LABEL: grev4_i32:
516; RV32IB:       # %bb.0:
517; RV32IB-NEXT:    rev4.b a0, a0
518; RV32IB-NEXT:    ret
519;
520; RV32IBP-LABEL: grev4_i32:
521; RV32IBP:       # %bb.0:
522; RV32IBP-NEXT:    rev4.b a0, a0
523; RV32IBP-NEXT:    ret
524  %and = shl i32 %a, 4
525  %shl = and i32 %and, -252645136
526  %and1 = lshr i32 %a, 4
527  %shr = and i32 %and1, 252645135
528  %or = or i32 %shl, %shr
529  ret i32 %or
530}
531
532define i64 @grev4_i64(i64 %a) nounwind {
533; RV32I-LABEL: grev4_i64:
534; RV32I:       # %bb.0:
535; RV32I-NEXT:    slli a2, a0, 4
536; RV32I-NEXT:    slli a3, a1, 4
537; RV32I-NEXT:    lui a4, 986895
538; RV32I-NEXT:    addi a4, a4, 240
539; RV32I-NEXT:    and a3, a3, a4
540; RV32I-NEXT:    and a2, a2, a4
541; RV32I-NEXT:    srli a0, a0, 4
542; RV32I-NEXT:    srli a1, a1, 4
543; RV32I-NEXT:    lui a4, 61681
544; RV32I-NEXT:    addi a4, a4, -241
545; RV32I-NEXT:    and a1, a1, a4
546; RV32I-NEXT:    and a0, a0, a4
547; RV32I-NEXT:    or a0, a2, a0
548; RV32I-NEXT:    or a1, a3, a1
549; RV32I-NEXT:    ret
550;
551; RV32IB-LABEL: grev4_i64:
552; RV32IB:       # %bb.0:
553; RV32IB-NEXT:    rev4.b a0, a0
554; RV32IB-NEXT:    rev4.b a1, a1
555; RV32IB-NEXT:    ret
556;
557; RV32IBP-LABEL: grev4_i64:
558; RV32IBP:       # %bb.0:
559; RV32IBP-NEXT:    rev4.b a0, a0
560; RV32IBP-NEXT:    rev4.b a1, a1
561; RV32IBP-NEXT:    ret
562  %and = shl i64 %a, 4
563  %shl = and i64 %and, -1085102592571150096
564  %and1 = lshr i64 %a, 4
565  %shr = and i64 %and1, 1085102592571150095
566  %or = or i64 %shl, %shr
567  ret i64 %or
568}
569
570define i32 @grev8_i32(i32 %a) nounwind {
571; RV32I-LABEL: grev8_i32:
572; RV32I:       # %bb.0:
573; RV32I-NEXT:    slli a1, a0, 8
574; RV32I-NEXT:    lui a2, 1044496
575; RV32I-NEXT:    addi a2, a2, -256
576; RV32I-NEXT:    and a1, a1, a2
577; RV32I-NEXT:    srli a0, a0, 8
578; RV32I-NEXT:    lui a2, 4080
579; RV32I-NEXT:    addi a2, a2, 255
580; RV32I-NEXT:    and a0, a0, a2
581; RV32I-NEXT:    or a0, a1, a0
582; RV32I-NEXT:    ret
583;
584; RV32IB-LABEL: grev8_i32:
585; RV32IB:       # %bb.0:
586; RV32IB-NEXT:    rev8.h a0, a0
587; RV32IB-NEXT:    ret
588;
589; RV32IBP-LABEL: grev8_i32:
590; RV32IBP:       # %bb.0:
591; RV32IBP-NEXT:    rev8.h a0, a0
592; RV32IBP-NEXT:    ret
593  %and = shl i32 %a, 8
594  %shl = and i32 %and, -16711936
595  %and1 = lshr i32 %a, 8
596  %shr = and i32 %and1, 16711935
597  %or = or i32 %shl, %shr
598  ret i32 %or
599}
600
601define i64 @grev8_i64(i64 %a) nounwind {
602; RV32I-LABEL: grev8_i64:
603; RV32I:       # %bb.0:
604; RV32I-NEXT:    slli a2, a0, 8
605; RV32I-NEXT:    slli a3, a1, 8
606; RV32I-NEXT:    lui a4, 1044496
607; RV32I-NEXT:    addi a4, a4, -256
608; RV32I-NEXT:    and a3, a3, a4
609; RV32I-NEXT:    and a2, a2, a4
610; RV32I-NEXT:    srli a0, a0, 8
611; RV32I-NEXT:    srli a1, a1, 8
612; RV32I-NEXT:    lui a4, 4080
613; RV32I-NEXT:    addi a4, a4, 255
614; RV32I-NEXT:    and a1, a1, a4
615; RV32I-NEXT:    and a0, a0, a4
616; RV32I-NEXT:    or a0, a2, a0
617; RV32I-NEXT:    or a1, a3, a1
618; RV32I-NEXT:    ret
619;
620; RV32IB-LABEL: grev8_i64:
621; RV32IB:       # %bb.0:
622; RV32IB-NEXT:    rev8.h a0, a0
623; RV32IB-NEXT:    rev8.h a1, a1
624; RV32IB-NEXT:    ret
625;
626; RV32IBP-LABEL: grev8_i64:
627; RV32IBP:       # %bb.0:
628; RV32IBP-NEXT:    rev8.h a0, a0
629; RV32IBP-NEXT:    rev8.h a1, a1
630; RV32IBP-NEXT:    ret
631  %and = shl i64 %a, 8
632  %shl = and i64 %and, -71777214294589696
633  %and1 = lshr i64 %a, 8
634  %shr = and i64 %and1, 71777214294589695
635  %or = or i64 %shl, %shr
636  ret i64 %or
637}
638
639define i32 @grev16_i32(i32 %a) nounwind {
640; RV32I-LABEL: grev16_i32:
641; RV32I:       # %bb.0:
642; RV32I-NEXT:    slli a1, a0, 16
643; RV32I-NEXT:    srli a0, a0, 16
644; RV32I-NEXT:    or a0, a1, a0
645; RV32I-NEXT:    ret
646;
647; RV32IB-LABEL: grev16_i32:
648; RV32IB:       # %bb.0:
649; RV32IB-NEXT:    rori a0, a0, 16
650; RV32IB-NEXT:    ret
651;
652; RV32IBP-LABEL: grev16_i32:
653; RV32IBP:       # %bb.0:
654; RV32IBP-NEXT:    rori a0, a0, 16
655; RV32IBP-NEXT:    ret
656  %shl = shl i32 %a, 16
657  %shr = lshr i32 %a, 16
658  %or = or i32 %shl, %shr
659  ret i32 %or
660}
661
662define i64 @grev16_i64(i64 %a) nounwind {
663; RV32I-LABEL: grev16_i64:
664; RV32I:       # %bb.0:
665; RV32I-NEXT:    slli a2, a1, 16
666; RV32I-NEXT:    srli a3, a0, 16
667; RV32I-NEXT:    slli a0, a0, 16
668; RV32I-NEXT:    or a0, a0, a3
669; RV32I-NEXT:    srli a1, a1, 16
670; RV32I-NEXT:    or a1, a2, a1
671; RV32I-NEXT:    ret
672;
673; RV32IB-LABEL: grev16_i64:
674; RV32IB:       # %bb.0:
675; RV32IB-NEXT:    rori a0, a0, 16
676; RV32IB-NEXT:    rori a1, a1, 16
677; RV32IB-NEXT:    ret
678;
679; RV32IBP-LABEL: grev16_i64:
680; RV32IBP:       # %bb.0:
681; RV32IBP-NEXT:    rori a0, a0, 16
682; RV32IBP-NEXT:    rori a1, a1, 16
683; RV32IBP-NEXT:    ret
684  %and = shl i64 %a, 16
685  %shl = and i64 %and, -281470681808896
686  %and1 = lshr i64 %a, 16
687  %shr = and i64 %and1, 281470681808895
688  %or = or i64 %shl, %shr
689  ret i64 %or
690}
691
692declare i32 @llvm.bswap.i32(i32)
693
694define i32 @bswap_i32(i32 %a) nounwind {
695; RV32I-LABEL: bswap_i32:
696; RV32I:       # %bb.0:
697; RV32I-NEXT:    srli a1, a0, 8
698; RV32I-NEXT:    lui a2, 16
699; RV32I-NEXT:    addi a2, a2, -256
700; RV32I-NEXT:    and a1, a1, a2
701; RV32I-NEXT:    srli a2, a0, 24
702; RV32I-NEXT:    or a1, a1, a2
703; RV32I-NEXT:    slli a2, a0, 8
704; RV32I-NEXT:    lui a3, 4080
705; RV32I-NEXT:    and a2, a2, a3
706; RV32I-NEXT:    slli a0, a0, 24
707; RV32I-NEXT:    or a0, a0, a2
708; RV32I-NEXT:    or a0, a0, a1
709; RV32I-NEXT:    ret
710;
711; RV32IB-LABEL: bswap_i32:
712; RV32IB:       # %bb.0:
713; RV32IB-NEXT:    rev8 a0, a0
714; RV32IB-NEXT:    ret
715;
716; RV32IBP-LABEL: bswap_i32:
717; RV32IBP:       # %bb.0:
718; RV32IBP-NEXT:    rev8 a0, a0
719; RV32IBP-NEXT:    ret
720  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
721  ret i32 %1
722}
723
724declare i64 @llvm.bswap.i64(i64)
725
726define i64 @bswap_i64(i64 %a) {
727; RV32I-LABEL: bswap_i64:
728; RV32I:       # %bb.0:
729; RV32I-NEXT:    srli a2, a1, 8
730; RV32I-NEXT:    lui a3, 16
731; RV32I-NEXT:    addi a3, a3, -256
732; RV32I-NEXT:    and a2, a2, a3
733; RV32I-NEXT:    srli a4, a1, 24
734; RV32I-NEXT:    or a2, a2, a4
735; RV32I-NEXT:    slli a4, a1, 8
736; RV32I-NEXT:    lui a5, 4080
737; RV32I-NEXT:    and a4, a4, a5
738; RV32I-NEXT:    slli a1, a1, 24
739; RV32I-NEXT:    or a1, a1, a4
740; RV32I-NEXT:    or a2, a1, a2
741; RV32I-NEXT:    srli a1, a0, 8
742; RV32I-NEXT:    and a1, a1, a3
743; RV32I-NEXT:    srli a3, a0, 24
744; RV32I-NEXT:    or a1, a1, a3
745; RV32I-NEXT:    slli a3, a0, 8
746; RV32I-NEXT:    and a3, a3, a5
747; RV32I-NEXT:    slli a0, a0, 24
748; RV32I-NEXT:    or a0, a0, a3
749; RV32I-NEXT:    or a1, a0, a1
750; RV32I-NEXT:    mv a0, a2
751; RV32I-NEXT:    ret
752;
753; RV32IB-LABEL: bswap_i64:
754; RV32IB:       # %bb.0:
755; RV32IB-NEXT:    rev8 a2, a1
756; RV32IB-NEXT:    rev8 a1, a0
757; RV32IB-NEXT:    mv a0, a2
758; RV32IB-NEXT:    ret
759;
760; RV32IBP-LABEL: bswap_i64:
761; RV32IBP:       # %bb.0:
762; RV32IBP-NEXT:    rev8 a2, a1
763; RV32IBP-NEXT:    rev8 a1, a0
764; RV32IBP-NEXT:    mv a0, a2
765; RV32IBP-NEXT:    ret
766  %1 = call i64 @llvm.bswap.i64(i64 %a)
767  ret i64 %1
768}
769
770declare i32 @llvm.bitreverse.i32(i32)
771
772define i32 @bitreverse_i32(i32 %a) nounwind {
773; RV32I-LABEL: bitreverse_i32:
774; RV32I:       # %bb.0:
775; RV32I-NEXT:    srli a1, a0, 8
776; RV32I-NEXT:    lui a2, 16
777; RV32I-NEXT:    addi a2, a2, -256
778; RV32I-NEXT:    and a1, a1, a2
779; RV32I-NEXT:    srli a2, a0, 24
780; RV32I-NEXT:    or a1, a1, a2
781; RV32I-NEXT:    slli a2, a0, 8
782; RV32I-NEXT:    lui a3, 4080
783; RV32I-NEXT:    and a2, a2, a3
784; RV32I-NEXT:    slli a0, a0, 24
785; RV32I-NEXT:    or a0, a0, a2
786; RV32I-NEXT:    or a0, a0, a1
787; RV32I-NEXT:    lui a1, 61681
788; RV32I-NEXT:    addi a1, a1, -241
789; RV32I-NEXT:    and a1, a0, a1
790; RV32I-NEXT:    slli a1, a1, 4
791; RV32I-NEXT:    lui a2, 986895
792; RV32I-NEXT:    addi a2, a2, 240
793; RV32I-NEXT:    and a0, a0, a2
794; RV32I-NEXT:    srli a0, a0, 4
795; RV32I-NEXT:    or a0, a0, a1
796; RV32I-NEXT:    lui a1, 209715
797; RV32I-NEXT:    addi a1, a1, 819
798; RV32I-NEXT:    and a1, a0, a1
799; RV32I-NEXT:    slli a1, a1, 2
800; RV32I-NEXT:    lui a2, 838861
801; RV32I-NEXT:    addi a2, a2, -820
802; RV32I-NEXT:    and a0, a0, a2
803; RV32I-NEXT:    srli a0, a0, 2
804; RV32I-NEXT:    or a0, a0, a1
805; RV32I-NEXT:    lui a1, 349525
806; RV32I-NEXT:    addi a1, a1, 1365
807; RV32I-NEXT:    and a1, a0, a1
808; RV32I-NEXT:    slli a1, a1, 1
809; RV32I-NEXT:    lui a2, 699051
810; RV32I-NEXT:    addi a2, a2, -1366
811; RV32I-NEXT:    and a0, a0, a2
812; RV32I-NEXT:    srli a0, a0, 1
813; RV32I-NEXT:    or a0, a0, a1
814; RV32I-NEXT:    ret
815;
816; RV32IB-LABEL: bitreverse_i32:
817; RV32IB:       # %bb.0:
818; RV32IB-NEXT:    rev a0, a0
819; RV32IB-NEXT:    ret
820;
821; RV32IBP-LABEL: bitreverse_i32:
822; RV32IBP:       # %bb.0:
823; RV32IBP-NEXT:    rev a0, a0
824; RV32IBP-NEXT:    ret
825  %1 = tail call i32 @llvm.bitreverse.i32(i32 %a)
826  ret i32 %1
827}
828
829declare i64 @llvm.bitreverse.i64(i64)
830
831define i64 @bitreverse_i64(i64 %a) nounwind {
832; RV32I-LABEL: bitreverse_i64:
833; RV32I:       # %bb.0:
834; RV32I-NEXT:    srli a2, a1, 8
835; RV32I-NEXT:    lui a3, 16
836; RV32I-NEXT:    addi t0, a3, -256
837; RV32I-NEXT:    and a2, a2, t0
838; RV32I-NEXT:    srli a4, a1, 24
839; RV32I-NEXT:    or a2, a2, a4
840; RV32I-NEXT:    slli a4, a1, 8
841; RV32I-NEXT:    lui a6, 4080
842; RV32I-NEXT:    and a4, a4, a6
843; RV32I-NEXT:    slli a1, a1, 24
844; RV32I-NEXT:    or a1, a1, a4
845; RV32I-NEXT:    or a1, a1, a2
846; RV32I-NEXT:    lui a2, 61681
847; RV32I-NEXT:    addi t1, a2, -241
848; RV32I-NEXT:    and a2, a1, t1
849; RV32I-NEXT:    slli a2, a2, 4
850; RV32I-NEXT:    lui a5, 986895
851; RV32I-NEXT:    addi t2, a5, 240
852; RV32I-NEXT:    and a1, a1, t2
853; RV32I-NEXT:    srli a1, a1, 4
854; RV32I-NEXT:    or a1, a1, a2
855; RV32I-NEXT:    lui a2, 209715
856; RV32I-NEXT:    addi t3, a2, 819
857; RV32I-NEXT:    and a3, a1, t3
858; RV32I-NEXT:    slli a3, a3, 2
859; RV32I-NEXT:    lui a4, 838861
860; RV32I-NEXT:    addi a4, a4, -820
861; RV32I-NEXT:    and a1, a1, a4
862; RV32I-NEXT:    srli a1, a1, 2
863; RV32I-NEXT:    or a1, a1, a3
864; RV32I-NEXT:    lui a3, 349525
865; RV32I-NEXT:    addi a3, a3, 1365
866; RV32I-NEXT:    and a5, a1, a3
867; RV32I-NEXT:    slli a5, a5, 1
868; RV32I-NEXT:    lui a2, 699051
869; RV32I-NEXT:    addi a2, a2, -1366
870; RV32I-NEXT:    and a1, a1, a2
871; RV32I-NEXT:    srli a1, a1, 1
872; RV32I-NEXT:    or a7, a1, a5
873; RV32I-NEXT:    srli a1, a0, 8
874; RV32I-NEXT:    and a1, a1, t0
875; RV32I-NEXT:    srli a5, a0, 24
876; RV32I-NEXT:    or a1, a1, a5
877; RV32I-NEXT:    slli a5, a0, 8
878; RV32I-NEXT:    and a5, a5, a6
879; RV32I-NEXT:    slli a0, a0, 24
880; RV32I-NEXT:    or a0, a0, a5
881; RV32I-NEXT:    or a0, a0, a1
882; RV32I-NEXT:    and a1, a0, t1
883; RV32I-NEXT:    slli a1, a1, 4
884; RV32I-NEXT:    and a0, a0, t2
885; RV32I-NEXT:    srli a0, a0, 4
886; RV32I-NEXT:    or a0, a0, a1
887; RV32I-NEXT:    and a1, a0, t3
888; RV32I-NEXT:    slli a1, a1, 2
889; RV32I-NEXT:    and a0, a0, a4
890; RV32I-NEXT:    srli a0, a0, 2
891; RV32I-NEXT:    or a0, a0, a1
892; RV32I-NEXT:    and a1, a0, a3
893; RV32I-NEXT:    slli a1, a1, 1
894; RV32I-NEXT:    and a0, a0, a2
895; RV32I-NEXT:    srli a0, a0, 1
896; RV32I-NEXT:    or a1, a0, a1
897; RV32I-NEXT:    mv a0, a7
898; RV32I-NEXT:    ret
899;
900; RV32IB-LABEL: bitreverse_i64:
901; RV32IB:       # %bb.0:
902; RV32IB-NEXT:    rev a2, a1
903; RV32IB-NEXT:    rev a1, a0
904; RV32IB-NEXT:    mv a0, a2
905; RV32IB-NEXT:    ret
906;
907; RV32IBP-LABEL: bitreverse_i64:
908; RV32IBP:       # %bb.0:
909; RV32IBP-NEXT:    rev a2, a1
910; RV32IBP-NEXT:    rev a1, a0
911; RV32IBP-NEXT:    mv a0, a2
912; RV32IBP-NEXT:    ret
913  %1 = call i64 @llvm.bitreverse.i64(i64 %a)
914  ret i64 %1
915}
916
917define i32 @shfl1_i32(i32 %a, i32 %b) nounwind {
918; RV32I-LABEL: shfl1_i32:
919; RV32I:       # %bb.0:
920; RV32I-NEXT:    lui a1, 629146
921; RV32I-NEXT:    addi a1, a1, -1639
922; RV32I-NEXT:    and a1, a0, a1
923; RV32I-NEXT:    slli a2, a0, 1
924; RV32I-NEXT:    lui a3, 279620
925; RV32I-NEXT:    addi a3, a3, 1092
926; RV32I-NEXT:    and a2, a2, a3
927; RV32I-NEXT:    or a1, a2, a1
928; RV32I-NEXT:    srli a0, a0, 1
929; RV32I-NEXT:    lui a2, 139810
930; RV32I-NEXT:    addi a2, a2, 546
931; RV32I-NEXT:    and a0, a0, a2
932; RV32I-NEXT:    or a0, a1, a0
933; RV32I-NEXT:    ret
934;
935; RV32IB-LABEL: shfl1_i32:
936; RV32IB:       # %bb.0:
937; RV32IB-NEXT:    zip.n a0, a0
938; RV32IB-NEXT:    ret
939;
940; RV32IBP-LABEL: shfl1_i32:
941; RV32IBP:       # %bb.0:
942; RV32IBP-NEXT:    zip.n a0, a0
943; RV32IBP-NEXT:    ret
944  %and = and i32 %a, -1717986919
945  %shl = shl i32 %a, 1
946  %and1 = and i32 %shl, 1145324612
947  %or = or i32 %and1, %and
948  %shr = lshr i32 %a, 1
949  %and2 = and i32 %shr, 572662306
950  %or3 = or i32 %or, %and2
951  ret i32 %or3
952}
953
954define i64 @shfl1_i64(i64 %a, i64 %b) nounwind {
955; RV32I-LABEL: shfl1_i64:
956; RV32I:       # %bb.0:
957; RV32I-NEXT:    lui a2, 629146
958; RV32I-NEXT:    addi a2, a2, -1639
959; RV32I-NEXT:    and a6, a0, a2
960; RV32I-NEXT:    and a2, a1, a2
961; RV32I-NEXT:    slli a4, a1, 1
962; RV32I-NEXT:    slli a5, a0, 1
963; RV32I-NEXT:    lui a3, 279620
964; RV32I-NEXT:    addi a3, a3, 1092
965; RV32I-NEXT:    and a5, a5, a3
966; RV32I-NEXT:    and a3, a4, a3
967; RV32I-NEXT:    or a2, a3, a2
968; RV32I-NEXT:    or a3, a5, a6
969; RV32I-NEXT:    srli a0, a0, 1
970; RV32I-NEXT:    srli a1, a1, 1
971; RV32I-NEXT:    lui a4, 139810
972; RV32I-NEXT:    addi a4, a4, 546
973; RV32I-NEXT:    and a1, a1, a4
974; RV32I-NEXT:    and a0, a0, a4
975; RV32I-NEXT:    or a0, a3, a0
976; RV32I-NEXT:    or a1, a2, a1
977; RV32I-NEXT:    ret
978;
979; RV32IB-LABEL: shfl1_i64:
980; RV32IB:       # %bb.0:
981; RV32IB-NEXT:    zip.n a0, a0
982; RV32IB-NEXT:    zip.n a1, a1
983; RV32IB-NEXT:    ret
984;
985; RV32IBP-LABEL: shfl1_i64:
986; RV32IBP:       # %bb.0:
987; RV32IBP-NEXT:    zip.n a0, a0
988; RV32IBP-NEXT:    zip.n a1, a1
989; RV32IBP-NEXT:    ret
990  %and = and i64 %a, -7378697629483820647
991  %shl = shl i64 %a, 1
992  %and1 = and i64 %shl, 4919131752989213764
993  %or = or i64 %and1, %and
994  %shr = lshr i64 %a, 1
995  %and2 = and i64 %shr, 2459565876494606882
996  %or3 = or i64 %or, %and2
997  ret i64 %or3
998}
999
1000define i32 @shfl2_i32(i32 %a, i32 %b) nounwind {
1001; RV32I-LABEL: shfl2_i32:
1002; RV32I:       # %bb.0:
1003; RV32I-NEXT:    lui a1, 801852
1004; RV32I-NEXT:    addi a1, a1, 963
1005; RV32I-NEXT:    and a1, a0, a1
1006; RV32I-NEXT:    slli a2, a0, 2
1007; RV32I-NEXT:    lui a3, 197379
1008; RV32I-NEXT:    addi a3, a3, 48
1009; RV32I-NEXT:    and a2, a2, a3
1010; RV32I-NEXT:    or a1, a2, a1
1011; RV32I-NEXT:    srli a0, a0, 2
1012; RV32I-NEXT:    lui a2, 49345
1013; RV32I-NEXT:    addi a2, a2, -1012
1014; RV32I-NEXT:    and a0, a0, a2
1015; RV32I-NEXT:    or a0, a1, a0
1016; RV32I-NEXT:    ret
1017;
1018; RV32IB-LABEL: shfl2_i32:
1019; RV32IB:       # %bb.0:
1020; RV32IB-NEXT:    zip2.b a0, a0
1021; RV32IB-NEXT:    ret
1022;
1023; RV32IBP-LABEL: shfl2_i32:
1024; RV32IBP:       # %bb.0:
1025; RV32IBP-NEXT:    zip2.b a0, a0
1026; RV32IBP-NEXT:    ret
1027  %and = and i32 %a, -1010580541
1028  %shl = shl i32 %a, 2
1029  %and1 = and i32 %shl, 808464432
1030  %or = or i32 %and1, %and
1031  %shr = lshr i32 %a, 2
1032  %and2 = and i32 %shr, 202116108
1033  %or3 = or i32 %or, %and2
1034  ret i32 %or3
1035}
1036
1037define i64 @shfl2_i64(i64 %a, i64 %b) nounwind {
1038; RV32I-LABEL: shfl2_i64:
1039; RV32I:       # %bb.0:
1040; RV32I-NEXT:    lui a2, 801852
1041; RV32I-NEXT:    addi a2, a2, 963
1042; RV32I-NEXT:    and a6, a0, a2
1043; RV32I-NEXT:    and a2, a1, a2
1044; RV32I-NEXT:    slli a4, a1, 2
1045; RV32I-NEXT:    slli a5, a0, 2
1046; RV32I-NEXT:    lui a3, 197379
1047; RV32I-NEXT:    addi a3, a3, 48
1048; RV32I-NEXT:    and a5, a5, a3
1049; RV32I-NEXT:    and a3, a4, a3
1050; RV32I-NEXT:    or a2, a3, a2
1051; RV32I-NEXT:    or a3, a5, a6
1052; RV32I-NEXT:    srli a0, a0, 2
1053; RV32I-NEXT:    srli a1, a1, 2
1054; RV32I-NEXT:    lui a4, 49345
1055; RV32I-NEXT:    addi a4, a4, -1012
1056; RV32I-NEXT:    and a1, a1, a4
1057; RV32I-NEXT:    and a0, a0, a4
1058; RV32I-NEXT:    or a0, a3, a0
1059; RV32I-NEXT:    or a1, a2, a1
1060; RV32I-NEXT:    ret
1061;
1062; RV32IB-LABEL: shfl2_i64:
1063; RV32IB:       # %bb.0:
1064; RV32IB-NEXT:    zip2.b a0, a0
1065; RV32IB-NEXT:    zip2.b a1, a1
1066; RV32IB-NEXT:    ret
1067;
1068; RV32IBP-LABEL: shfl2_i64:
1069; RV32IBP:       # %bb.0:
1070; RV32IBP-NEXT:    zip2.b a0, a0
1071; RV32IBP-NEXT:    zip2.b a1, a1
1072; RV32IBP-NEXT:    ret
1073  %and = and i64 %a, -4340410370284600381
1074  %shl = shl i64 %a, 2
1075  %and1 = and i64 %shl, 3472328296227680304
1076  %or = or i64 %and1, %and
1077  %shr = lshr i64 %a, 2
1078  %and2 = and i64 %shr, 868082074056920076
1079  %or3 = or i64 %or, %and2
1080  ret i64 %or3
1081}
1082
1083define i32 @shfl4_i32(i32 %a, i32 %b) nounwind {
1084; RV32I-LABEL: shfl4_i32:
1085; RV32I:       # %bb.0:
1086; RV32I-NEXT:    lui a1, 983295
1087; RV32I-NEXT:    addi a1, a1, 15
1088; RV32I-NEXT:    and a1, a0, a1
1089; RV32I-NEXT:    slli a2, a0, 4
1090; RV32I-NEXT:    lui a3, 61441
1091; RV32I-NEXT:    addi a3, a3, -256
1092; RV32I-NEXT:    and a2, a2, a3
1093; RV32I-NEXT:    or a1, a2, a1
1094; RV32I-NEXT:    srli a0, a0, 4
1095; RV32I-NEXT:    lui a2, 3840
1096; RV32I-NEXT:    addi a2, a2, 240
1097; RV32I-NEXT:    and a0, a0, a2
1098; RV32I-NEXT:    or a0, a1, a0
1099; RV32I-NEXT:    ret
1100;
1101; RV32IB-LABEL: shfl4_i32:
1102; RV32IB:       # %bb.0:
1103; RV32IB-NEXT:    zip4.h a0, a0
1104; RV32IB-NEXT:    ret
1105;
1106; RV32IBP-LABEL: shfl4_i32:
1107; RV32IBP:       # %bb.0:
1108; RV32IBP-NEXT:    zip4.h a0, a0
1109; RV32IBP-NEXT:    ret
1110  %and = and i32 %a, -267390961
1111  %shl = shl i32 %a, 4
1112  %and1 = and i32 %shl, 251662080
1113  %or = or i32 %and1, %and
1114  %shr = lshr i32 %a, 4
1115  %and2 = and i32 %shr, 15728880
1116  %or3 = or i32 %or, %and2
1117  ret i32 %or3
1118}
1119
1120define i64 @shfl4_i64(i64 %a, i64 %b) nounwind {
1121; RV32I-LABEL: shfl4_i64:
1122; RV32I:       # %bb.0:
1123; RV32I-NEXT:    lui a2, 983295
1124; RV32I-NEXT:    addi a2, a2, 15
1125; RV32I-NEXT:    and a6, a0, a2
1126; RV32I-NEXT:    and a2, a1, a2
1127; RV32I-NEXT:    slli a4, a1, 4
1128; RV32I-NEXT:    slli a5, a0, 4
1129; RV32I-NEXT:    lui a3, 61441
1130; RV32I-NEXT:    addi a3, a3, -256
1131; RV32I-NEXT:    and a5, a5, a3
1132; RV32I-NEXT:    and a3, a4, a3
1133; RV32I-NEXT:    or a2, a3, a2
1134; RV32I-NEXT:    or a3, a5, a6
1135; RV32I-NEXT:    srli a0, a0, 4
1136; RV32I-NEXT:    srli a1, a1, 4
1137; RV32I-NEXT:    lui a4, 3840
1138; RV32I-NEXT:    addi a4, a4, 240
1139; RV32I-NEXT:    and a1, a1, a4
1140; RV32I-NEXT:    and a0, a0, a4
1141; RV32I-NEXT:    or a0, a3, a0
1142; RV32I-NEXT:    or a1, a2, a1
1143; RV32I-NEXT:    ret
1144;
1145; RV32IB-LABEL: shfl4_i64:
1146; RV32IB:       # %bb.0:
1147; RV32IB-NEXT:    zip4.h a0, a0
1148; RV32IB-NEXT:    zip4.h a1, a1
1149; RV32IB-NEXT:    ret
1150;
1151; RV32IBP-LABEL: shfl4_i64:
1152; RV32IBP:       # %bb.0:
1153; RV32IBP-NEXT:    zip4.h a0, a0
1154; RV32IBP-NEXT:    zip4.h a1, a1
1155; RV32IBP-NEXT:    ret
1156  %and = and i64 %a, -1148435428713435121
1157  %shl = shl i64 %a, 4
1158  %and1 = and i64 %shl, 1080880403494997760
1159  %or = or i64 %and1, %and
1160  %shr = lshr i64 %a, 4
1161  %and2 = and i64 %shr, 67555025218437360
1162  %or3 = or i64 %or, %and2
1163  ret i64 %or3
1164}
1165
1166define i32 @shfl8_i32(i32 %a, i32 %b) nounwind {
1167; RV32I-LABEL: shfl8_i32:
1168; RV32I:       # %bb.0:
1169; RV32I-NEXT:    lui a1, 1044480
1170; RV32I-NEXT:    addi a1, a1, 255
1171; RV32I-NEXT:    and a1, a0, a1
1172; RV32I-NEXT:    slli a2, a0, 8
1173; RV32I-NEXT:    lui a3, 4080
1174; RV32I-NEXT:    and a2, a2, a3
1175; RV32I-NEXT:    or a1, a2, a1
1176; RV32I-NEXT:    srli a0, a0, 8
1177; RV32I-NEXT:    lui a2, 16
1178; RV32I-NEXT:    addi a2, a2, -256
1179; RV32I-NEXT:    and a0, a0, a2
1180; RV32I-NEXT:    or a0, a1, a0
1181; RV32I-NEXT:    ret
1182;
1183; RV32IB-LABEL: shfl8_i32:
1184; RV32IB:       # %bb.0:
1185; RV32IB-NEXT:    zip8 a0, a0
1186; RV32IB-NEXT:    ret
1187;
1188; RV32IBP-LABEL: shfl8_i32:
1189; RV32IBP:       # %bb.0:
1190; RV32IBP-NEXT:    zip8 a0, a0
1191; RV32IBP-NEXT:    ret
1192  %and = and i32 %a, -16776961
1193  %shl = shl i32 %a, 8
1194  %and1 = and i32 %shl, 16711680
1195  %or = or i32 %and1, %and
1196  %shr = lshr i32 %a, 8
1197  %and2 = and i32 %shr, 65280
1198  %or3 = or i32 %or, %and2
1199  ret i32 %or3
1200}
1201
1202define i64 @shfl8_i64(i64 %a, i64 %b) nounwind {
1203; RV32I-LABEL: shfl8_i64:
1204; RV32I:       # %bb.0:
1205; RV32I-NEXT:    lui a2, 1044480
1206; RV32I-NEXT:    addi a2, a2, 255
1207; RV32I-NEXT:    and a3, a0, a2
1208; RV32I-NEXT:    and a2, a1, a2
1209; RV32I-NEXT:    slli a4, a1, 8
1210; RV32I-NEXT:    slli a5, a0, 8
1211; RV32I-NEXT:    lui a6, 4080
1212; RV32I-NEXT:    and a5, a5, a6
1213; RV32I-NEXT:    and a4, a4, a6
1214; RV32I-NEXT:    or a2, a4, a2
1215; RV32I-NEXT:    or a3, a5, a3
1216; RV32I-NEXT:    srli a0, a0, 8
1217; RV32I-NEXT:    srli a1, a1, 8
1218; RV32I-NEXT:    lui a4, 16
1219; RV32I-NEXT:    addi a4, a4, -256
1220; RV32I-NEXT:    and a1, a1, a4
1221; RV32I-NEXT:    and a0, a0, a4
1222; RV32I-NEXT:    or a0, a3, a0
1223; RV32I-NEXT:    or a1, a2, a1
1224; RV32I-NEXT:    ret
1225;
1226; RV32IB-LABEL: shfl8_i64:
1227; RV32IB:       # %bb.0:
1228; RV32IB-NEXT:    zip8 a0, a0
1229; RV32IB-NEXT:    zip8 a1, a1
1230; RV32IB-NEXT:    ret
1231;
1232; RV32IBP-LABEL: shfl8_i64:
1233; RV32IBP:       # %bb.0:
1234; RV32IBP-NEXT:    zip8 a0, a0
1235; RV32IBP-NEXT:    zip8 a1, a1
1236; RV32IBP-NEXT:    ret
1237  %and = and i64 %a, -72056494543077121
1238  %shl = shl i64 %a, 8
1239  %and1 = and i64 %shl, 71776119077928960
1240  %or = or i64 %and1, %and
1241  %shr = lshr i64 %a, 8
1242  %and2 = and i64 %shr, 280375465148160
1243  %or3 = or i64 %or, %and2
1244  ret i64 %or3
1245}
1246