1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3; RUN:		-mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9LE
4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9BE
6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8LE
8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
9; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8BE
10
11define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
12; P9LE-LABEL: fold_urem_vec_1:
13; P9LE:       # %bb.0:
14; P9LE-NEXT:    li r3, 4
15; P9LE-NEXT:    vextuhrx r3, r3, v2
16; P9LE-NEXT:    lis r4, 21399
17; P9LE-NEXT:    ori r4, r4, 33437
18; P9LE-NEXT:    clrlwi r3, r3, 16
19; P9LE-NEXT:    mulhwu r4, r3, r4
20; P9LE-NEXT:    srwi r4, r4, 5
21; P9LE-NEXT:    mulli r4, r4, 98
22; P9LE-NEXT:    sub r3, r3, r4
23; P9LE-NEXT:    lis r4, 16727
24; P9LE-NEXT:    mtvsrd v3, r3
25; P9LE-NEXT:    li r3, 6
26; P9LE-NEXT:    vextuhrx r3, r3, v2
27; P9LE-NEXT:    clrlwi r3, r3, 16
28; P9LE-NEXT:    ori r4, r4, 2287
29; P9LE-NEXT:    mulhwu r4, r3, r4
30; P9LE-NEXT:    srwi r4, r4, 8
31; P9LE-NEXT:    mulli r4, r4, 1003
32; P9LE-NEXT:    sub r3, r3, r4
33; P9LE-NEXT:    mtvsrd v4, r3
34; P9LE-NEXT:    li r3, 2
35; P9LE-NEXT:    vextuhrx r3, r3, v2
36; P9LE-NEXT:    lis r5, 8456
37; P9LE-NEXT:    ori r5, r5, 16913
38; P9LE-NEXT:    vmrghh v3, v4, v3
39; P9LE-NEXT:    clrlwi r4, r3, 16
40; P9LE-NEXT:    rlwinm r3, r3, 30, 18, 31
41; P9LE-NEXT:    mulhwu r3, r3, r5
42; P9LE-NEXT:    srwi r3, r3, 2
43; P9LE-NEXT:    mulli r3, r3, 124
44; P9LE-NEXT:    sub r3, r4, r3
45; P9LE-NEXT:    lis r4, 22765
46; P9LE-NEXT:    mtvsrd v4, r3
47; P9LE-NEXT:    li r3, 0
48; P9LE-NEXT:    vextuhrx r3, r3, v2
49; P9LE-NEXT:    clrlwi r3, r3, 16
50; P9LE-NEXT:    ori r4, r4, 8969
51; P9LE-NEXT:    mulhwu r4, r3, r4
52; P9LE-NEXT:    sub r5, r3, r4
53; P9LE-NEXT:    srwi r5, r5, 1
54; P9LE-NEXT:    add r4, r5, r4
55; P9LE-NEXT:    srwi r4, r4, 6
56; P9LE-NEXT:    mulli r4, r4, 95
57; P9LE-NEXT:    sub r3, r3, r4
58; P9LE-NEXT:    mtvsrd v2, r3
59; P9LE-NEXT:    vmrghh v2, v4, v2
60; P9LE-NEXT:    vmrglw v2, v3, v2
61; P9LE-NEXT:    blr
62;
63; P9BE-LABEL: fold_urem_vec_1:
64; P9BE:       # %bb.0:
65; P9BE-NEXT:    li r3, 6
66; P9BE-NEXT:    vextuhlx r3, r3, v2
67; P9BE-NEXT:    lis r4, 16727
68; P9BE-NEXT:    ori r4, r4, 2287
69; P9BE-NEXT:    clrlwi r3, r3, 16
70; P9BE-NEXT:    mulhwu r4, r3, r4
71; P9BE-NEXT:    srwi r4, r4, 8
72; P9BE-NEXT:    mulli r4, r4, 1003
73; P9BE-NEXT:    sub r3, r3, r4
74; P9BE-NEXT:    lis r4, 21399
75; P9BE-NEXT:    sldi r3, r3, 48
76; P9BE-NEXT:    mtvsrd v3, r3
77; P9BE-NEXT:    li r3, 4
78; P9BE-NEXT:    vextuhlx r3, r3, v2
79; P9BE-NEXT:    clrlwi r3, r3, 16
80; P9BE-NEXT:    ori r4, r4, 33437
81; P9BE-NEXT:    mulhwu r4, r3, r4
82; P9BE-NEXT:    srwi r4, r4, 5
83; P9BE-NEXT:    mulli r4, r4, 98
84; P9BE-NEXT:    sub r3, r3, r4
85; P9BE-NEXT:    sldi r3, r3, 48
86; P9BE-NEXT:    mtvsrd v4, r3
87; P9BE-NEXT:    li r3, 2
88; P9BE-NEXT:    vextuhlx r3, r3, v2
89; P9BE-NEXT:    lis r5, 8456
90; P9BE-NEXT:    ori r5, r5, 16913
91; P9BE-NEXT:    vmrghh v3, v4, v3
92; P9BE-NEXT:    clrlwi r4, r3, 16
93; P9BE-NEXT:    rlwinm r3, r3, 30, 18, 31
94; P9BE-NEXT:    mulhwu r3, r3, r5
95; P9BE-NEXT:    srwi r3, r3, 2
96; P9BE-NEXT:    mulli r3, r3, 124
97; P9BE-NEXT:    sub r3, r4, r3
98; P9BE-NEXT:    lis r4, 22765
99; P9BE-NEXT:    sldi r3, r3, 48
100; P9BE-NEXT:    mtvsrd v4, r3
101; P9BE-NEXT:    li r3, 0
102; P9BE-NEXT:    vextuhlx r3, r3, v2
103; P9BE-NEXT:    clrlwi r3, r3, 16
104; P9BE-NEXT:    ori r4, r4, 8969
105; P9BE-NEXT:    mulhwu r4, r3, r4
106; P9BE-NEXT:    sub r5, r3, r4
107; P9BE-NEXT:    srwi r5, r5, 1
108; P9BE-NEXT:    add r4, r5, r4
109; P9BE-NEXT:    srwi r4, r4, 6
110; P9BE-NEXT:    mulli r4, r4, 95
111; P9BE-NEXT:    sub r3, r3, r4
112; P9BE-NEXT:    sldi r3, r3, 48
113; P9BE-NEXT:    mtvsrd v2, r3
114; P9BE-NEXT:    vmrghh v2, v2, v4
115; P9BE-NEXT:    vmrghw v2, v2, v3
116; P9BE-NEXT:    blr
117;
118; P8LE-LABEL: fold_urem_vec_1:
119; P8LE:       # %bb.0:
120; P8LE-NEXT:    xxswapd vs0, v2
121; P8LE-NEXT:    lis r3, 22765
122; P8LE-NEXT:    lis r7, 21399
123; P8LE-NEXT:    lis r9, 16727
124; P8LE-NEXT:    lis r10, 8456
125; P8LE-NEXT:    ori r3, r3, 8969
126; P8LE-NEXT:    ori r7, r7, 33437
127; P8LE-NEXT:    ori r9, r9, 2287
128; P8LE-NEXT:    ori r10, r10, 16913
129; P8LE-NEXT:    mffprd r4, f0
130; P8LE-NEXT:    clrldi r6, r4, 48
131; P8LE-NEXT:    rldicl r5, r4, 32, 48
132; P8LE-NEXT:    clrlwi r6, r6, 16
133; P8LE-NEXT:    rldicl r8, r4, 16, 48
134; P8LE-NEXT:    clrlwi r5, r5, 16
135; P8LE-NEXT:    mulhwu r3, r6, r3
136; P8LE-NEXT:    rldicl r4, r4, 48, 48
137; P8LE-NEXT:    clrlwi r8, r8, 16
138; P8LE-NEXT:    rlwinm r11, r4, 30, 18, 31
139; P8LE-NEXT:    mulhwu r7, r5, r7
140; P8LE-NEXT:    clrlwi r4, r4, 16
141; P8LE-NEXT:    mulhwu r9, r8, r9
142; P8LE-NEXT:    mulhwu r10, r11, r10
143; P8LE-NEXT:    sub r11, r6, r3
144; P8LE-NEXT:    srwi r11, r11, 1
145; P8LE-NEXT:    srwi r7, r7, 5
146; P8LE-NEXT:    add r3, r11, r3
147; P8LE-NEXT:    srwi r9, r9, 8
148; P8LE-NEXT:    srwi r10, r10, 2
149; P8LE-NEXT:    srwi r3, r3, 6
150; P8LE-NEXT:    mulli r7, r7, 98
151; P8LE-NEXT:    mulli r9, r9, 1003
152; P8LE-NEXT:    mulli r3, r3, 95
153; P8LE-NEXT:    mulli r10, r10, 124
154; P8LE-NEXT:    sub r5, r5, r7
155; P8LE-NEXT:    sub r7, r8, r9
156; P8LE-NEXT:    sub r3, r6, r3
157; P8LE-NEXT:    mtvsrd v2, r5
158; P8LE-NEXT:    sub r4, r4, r10
159; P8LE-NEXT:    mtvsrd v3, r7
160; P8LE-NEXT:    mtvsrd v4, r3
161; P8LE-NEXT:    mtvsrd v5, r4
162; P8LE-NEXT:    vmrghh v2, v3, v2
163; P8LE-NEXT:    vmrghh v3, v5, v4
164; P8LE-NEXT:    vmrglw v2, v2, v3
165; P8LE-NEXT:    blr
166;
167; P8BE-LABEL: fold_urem_vec_1:
168; P8BE:       # %bb.0:
169; P8BE-NEXT:    mfvsrd r4, v2
170; P8BE-NEXT:    lis r3, 22765
171; P8BE-NEXT:    lis r7, 16727
172; P8BE-NEXT:    lis r9, 21399
173; P8BE-NEXT:    lis r10, 8456
174; P8BE-NEXT:    ori r3, r3, 8969
175; P8BE-NEXT:    ori r7, r7, 2287
176; P8BE-NEXT:    ori r9, r9, 33437
177; P8BE-NEXT:    ori r10, r10, 16913
178; P8BE-NEXT:    rldicl r6, r4, 16, 48
179; P8BE-NEXT:    clrldi r5, r4, 48
180; P8BE-NEXT:    clrlwi r6, r6, 16
181; P8BE-NEXT:    rldicl r8, r4, 48, 48
182; P8BE-NEXT:    clrlwi r5, r5, 16
183; P8BE-NEXT:    mulhwu r3, r6, r3
184; P8BE-NEXT:    rldicl r4, r4, 32, 48
185; P8BE-NEXT:    clrlwi r8, r8, 16
186; P8BE-NEXT:    mulhwu r7, r5, r7
187; P8BE-NEXT:    rlwinm r11, r4, 30, 18, 31
188; P8BE-NEXT:    clrlwi r4, r4, 16
189; P8BE-NEXT:    mulhwu r9, r8, r9
190; P8BE-NEXT:    mulhwu r10, r11, r10
191; P8BE-NEXT:    sub r11, r6, r3
192; P8BE-NEXT:    srwi r11, r11, 1
193; P8BE-NEXT:    srwi r7, r7, 8
194; P8BE-NEXT:    add r3, r11, r3
195; P8BE-NEXT:    srwi r9, r9, 5
196; P8BE-NEXT:    srwi r10, r10, 2
197; P8BE-NEXT:    mulli r7, r7, 1003
198; P8BE-NEXT:    srwi r3, r3, 6
199; P8BE-NEXT:    mulli r9, r9, 98
200; P8BE-NEXT:    mulli r3, r3, 95
201; P8BE-NEXT:    mulli r10, r10, 124
202; P8BE-NEXT:    sub r5, r5, r7
203; P8BE-NEXT:    sub r7, r8, r9
204; P8BE-NEXT:    sldi r5, r5, 48
205; P8BE-NEXT:    sub r3, r6, r3
206; P8BE-NEXT:    sub r4, r4, r10
207; P8BE-NEXT:    mtvsrd v2, r5
208; P8BE-NEXT:    sldi r5, r7, 48
209; P8BE-NEXT:    sldi r3, r3, 48
210; P8BE-NEXT:    sldi r4, r4, 48
211; P8BE-NEXT:    mtvsrd v3, r5
212; P8BE-NEXT:    mtvsrd v4, r3
213; P8BE-NEXT:    mtvsrd v5, r4
214; P8BE-NEXT:    vmrghh v2, v3, v2
215; P8BE-NEXT:    vmrghh v3, v4, v5
216; P8BE-NEXT:    vmrghw v2, v3, v2
217; P8BE-NEXT:    blr
218  %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
219  ret <4 x i16> %1
220}
221
222define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
223; P9LE-LABEL: fold_urem_vec_2:
224; P9LE:       # %bb.0:
225; P9LE-NEXT:    li r3, 0
226; P9LE-NEXT:    vextuhrx r3, r3, v2
227; P9LE-NEXT:    lis r4, 22765
228; P9LE-NEXT:    ori r4, r4, 8969
229; P9LE-NEXT:    clrlwi r3, r3, 16
230; P9LE-NEXT:    mulhwu r5, r3, r4
231; P9LE-NEXT:    sub r6, r3, r5
232; P9LE-NEXT:    srwi r6, r6, 1
233; P9LE-NEXT:    add r5, r6, r5
234; P9LE-NEXT:    srwi r5, r5, 6
235; P9LE-NEXT:    mulli r5, r5, 95
236; P9LE-NEXT:    sub r3, r3, r5
237; P9LE-NEXT:    mtvsrd v3, r3
238; P9LE-NEXT:    li r3, 2
239; P9LE-NEXT:    vextuhrx r3, r3, v2
240; P9LE-NEXT:    clrlwi r3, r3, 16
241; P9LE-NEXT:    mulhwu r5, r3, r4
242; P9LE-NEXT:    sub r6, r3, r5
243; P9LE-NEXT:    srwi r6, r6, 1
244; P9LE-NEXT:    add r5, r6, r5
245; P9LE-NEXT:    srwi r5, r5, 6
246; P9LE-NEXT:    mulli r5, r5, 95
247; P9LE-NEXT:    sub r3, r3, r5
248; P9LE-NEXT:    mtvsrd v4, r3
249; P9LE-NEXT:    li r3, 4
250; P9LE-NEXT:    vextuhrx r3, r3, v2
251; P9LE-NEXT:    clrlwi r3, r3, 16
252; P9LE-NEXT:    mulhwu r5, r3, r4
253; P9LE-NEXT:    sub r6, r3, r5
254; P9LE-NEXT:    srwi r6, r6, 1
255; P9LE-NEXT:    add r5, r6, r5
256; P9LE-NEXT:    srwi r5, r5, 6
257; P9LE-NEXT:    mulli r5, r5, 95
258; P9LE-NEXT:    sub r3, r3, r5
259; P9LE-NEXT:    vmrghh v3, v4, v3
260; P9LE-NEXT:    mtvsrd v4, r3
261; P9LE-NEXT:    li r3, 6
262; P9LE-NEXT:    vextuhrx r3, r3, v2
263; P9LE-NEXT:    clrlwi r3, r3, 16
264; P9LE-NEXT:    mulhwu r4, r3, r4
265; P9LE-NEXT:    sub r5, r3, r4
266; P9LE-NEXT:    srwi r5, r5, 1
267; P9LE-NEXT:    add r4, r5, r4
268; P9LE-NEXT:    srwi r4, r4, 6
269; P9LE-NEXT:    mulli r4, r4, 95
270; P9LE-NEXT:    sub r3, r3, r4
271; P9LE-NEXT:    mtvsrd v2, r3
272; P9LE-NEXT:    vmrghh v2, v2, v4
273; P9LE-NEXT:    vmrglw v2, v2, v3
274; P9LE-NEXT:    blr
275;
276; P9BE-LABEL: fold_urem_vec_2:
277; P9BE:       # %bb.0:
278; P9BE-NEXT:    li r3, 6
279; P9BE-NEXT:    vextuhlx r3, r3, v2
280; P9BE-NEXT:    lis r4, 22765
281; P9BE-NEXT:    ori r4, r4, 8969
282; P9BE-NEXT:    clrlwi r3, r3, 16
283; P9BE-NEXT:    mulhwu r5, r3, r4
284; P9BE-NEXT:    sub r6, r3, r5
285; P9BE-NEXT:    srwi r6, r6, 1
286; P9BE-NEXT:    add r5, r6, r5
287; P9BE-NEXT:    srwi r5, r5, 6
288; P9BE-NEXT:    mulli r5, r5, 95
289; P9BE-NEXT:    sub r3, r3, r5
290; P9BE-NEXT:    sldi r3, r3, 48
291; P9BE-NEXT:    mtvsrd v3, r3
292; P9BE-NEXT:    li r3, 4
293; P9BE-NEXT:    vextuhlx r3, r3, v2
294; P9BE-NEXT:    clrlwi r3, r3, 16
295; P9BE-NEXT:    mulhwu r5, r3, r4
296; P9BE-NEXT:    sub r6, r3, r5
297; P9BE-NEXT:    srwi r6, r6, 1
298; P9BE-NEXT:    add r5, r6, r5
299; P9BE-NEXT:    srwi r5, r5, 6
300; P9BE-NEXT:    mulli r5, r5, 95
301; P9BE-NEXT:    sub r3, r3, r5
302; P9BE-NEXT:    sldi r3, r3, 48
303; P9BE-NEXT:    mtvsrd v4, r3
304; P9BE-NEXT:    li r3, 2
305; P9BE-NEXT:    vextuhlx r3, r3, v2
306; P9BE-NEXT:    clrlwi r3, r3, 16
307; P9BE-NEXT:    mulhwu r5, r3, r4
308; P9BE-NEXT:    sub r6, r3, r5
309; P9BE-NEXT:    srwi r6, r6, 1
310; P9BE-NEXT:    add r5, r6, r5
311; P9BE-NEXT:    srwi r5, r5, 6
312; P9BE-NEXT:    mulli r5, r5, 95
313; P9BE-NEXT:    sub r3, r3, r5
314; P9BE-NEXT:    sldi r3, r3, 48
315; P9BE-NEXT:    vmrghh v3, v4, v3
316; P9BE-NEXT:    mtvsrd v4, r3
317; P9BE-NEXT:    li r3, 0
318; P9BE-NEXT:    vextuhlx r3, r3, v2
319; P9BE-NEXT:    clrlwi r3, r3, 16
320; P9BE-NEXT:    mulhwu r4, r3, r4
321; P9BE-NEXT:    sub r5, r3, r4
322; P9BE-NEXT:    srwi r5, r5, 1
323; P9BE-NEXT:    add r4, r5, r4
324; P9BE-NEXT:    srwi r4, r4, 6
325; P9BE-NEXT:    mulli r4, r4, 95
326; P9BE-NEXT:    sub r3, r3, r4
327; P9BE-NEXT:    sldi r3, r3, 48
328; P9BE-NEXT:    mtvsrd v2, r3
329; P9BE-NEXT:    vmrghh v2, v2, v4
330; P9BE-NEXT:    vmrghw v2, v2, v3
331; P9BE-NEXT:    blr
332;
333; P8LE-LABEL: fold_urem_vec_2:
334; P8LE:       # %bb.0:
335; P8LE-NEXT:    xxswapd vs0, v2
336; P8LE-NEXT:    lis r3, 22765
337; P8LE-NEXT:    ori r3, r3, 8969
338; P8LE-NEXT:    mffprd r4, f0
339; P8LE-NEXT:    clrldi r5, r4, 48
340; P8LE-NEXT:    rldicl r6, r4, 48, 48
341; P8LE-NEXT:    clrlwi r5, r5, 16
342; P8LE-NEXT:    rldicl r7, r4, 32, 48
343; P8LE-NEXT:    clrlwi r6, r6, 16
344; P8LE-NEXT:    mulhwu r8, r5, r3
345; P8LE-NEXT:    rldicl r4, r4, 16, 48
346; P8LE-NEXT:    clrlwi r7, r7, 16
347; P8LE-NEXT:    mulhwu r9, r6, r3
348; P8LE-NEXT:    clrlwi r4, r4, 16
349; P8LE-NEXT:    mulhwu r10, r7, r3
350; P8LE-NEXT:    mulhwu r3, r4, r3
351; P8LE-NEXT:    sub r11, r5, r8
352; P8LE-NEXT:    sub r12, r6, r9
353; P8LE-NEXT:    srwi r11, r11, 1
354; P8LE-NEXT:    add r8, r11, r8
355; P8LE-NEXT:    sub r11, r7, r10
356; P8LE-NEXT:    srwi r12, r12, 1
357; P8LE-NEXT:    add r9, r12, r9
358; P8LE-NEXT:    sub r12, r4, r3
359; P8LE-NEXT:    srwi r11, r11, 1
360; P8LE-NEXT:    srwi r8, r8, 6
361; P8LE-NEXT:    add r10, r11, r10
362; P8LE-NEXT:    srwi r11, r12, 1
363; P8LE-NEXT:    srwi r9, r9, 6
364; P8LE-NEXT:    add r3, r11, r3
365; P8LE-NEXT:    mulli r8, r8, 95
366; P8LE-NEXT:    srwi r10, r10, 6
367; P8LE-NEXT:    srwi r3, r3, 6
368; P8LE-NEXT:    mulli r9, r9, 95
369; P8LE-NEXT:    mulli r10, r10, 95
370; P8LE-NEXT:    mulli r3, r3, 95
371; P8LE-NEXT:    sub r5, r5, r8
372; P8LE-NEXT:    sub r6, r6, r9
373; P8LE-NEXT:    mtvsrd v2, r5
374; P8LE-NEXT:    sub r5, r7, r10
375; P8LE-NEXT:    sub r3, r4, r3
376; P8LE-NEXT:    mtvsrd v3, r6
377; P8LE-NEXT:    mtvsrd v4, r5
378; P8LE-NEXT:    mtvsrd v5, r3
379; P8LE-NEXT:    vmrghh v2, v3, v2
380; P8LE-NEXT:    vmrghh v3, v5, v4
381; P8LE-NEXT:    vmrglw v2, v3, v2
382; P8LE-NEXT:    blr
383;
384; P8BE-LABEL: fold_urem_vec_2:
385; P8BE:       # %bb.0:
386; P8BE-NEXT:    mfvsrd r4, v2
387; P8BE-NEXT:    lis r3, 22765
388; P8BE-NEXT:    ori r3, r3, 8969
389; P8BE-NEXT:    clrldi r5, r4, 48
390; P8BE-NEXT:    rldicl r6, r4, 48, 48
391; P8BE-NEXT:    clrlwi r5, r5, 16
392; P8BE-NEXT:    rldicl r7, r4, 32, 48
393; P8BE-NEXT:    clrlwi r6, r6, 16
394; P8BE-NEXT:    mulhwu r8, r5, r3
395; P8BE-NEXT:    rldicl r4, r4, 16, 48
396; P8BE-NEXT:    clrlwi r7, r7, 16
397; P8BE-NEXT:    mulhwu r9, r6, r3
398; P8BE-NEXT:    clrlwi r4, r4, 16
399; P8BE-NEXT:    mulhwu r10, r7, r3
400; P8BE-NEXT:    mulhwu r3, r4, r3
401; P8BE-NEXT:    sub r11, r5, r8
402; P8BE-NEXT:    sub r12, r6, r9
403; P8BE-NEXT:    srwi r11, r11, 1
404; P8BE-NEXT:    add r8, r11, r8
405; P8BE-NEXT:    sub r11, r7, r10
406; P8BE-NEXT:    srwi r12, r12, 1
407; P8BE-NEXT:    add r9, r12, r9
408; P8BE-NEXT:    sub r12, r4, r3
409; P8BE-NEXT:    srwi r11, r11, 1
410; P8BE-NEXT:    srwi r8, r8, 6
411; P8BE-NEXT:    add r10, r11, r10
412; P8BE-NEXT:    srwi r11, r12, 1
413; P8BE-NEXT:    srwi r9, r9, 6
414; P8BE-NEXT:    add r3, r11, r3
415; P8BE-NEXT:    srwi r10, r10, 6
416; P8BE-NEXT:    srwi r3, r3, 6
417; P8BE-NEXT:    mulli r8, r8, 95
418; P8BE-NEXT:    mulli r9, r9, 95
419; P8BE-NEXT:    mulli r10, r10, 95
420; P8BE-NEXT:    mulli r3, r3, 95
421; P8BE-NEXT:    sub r5, r5, r8
422; P8BE-NEXT:    sub r6, r6, r9
423; P8BE-NEXT:    sub r7, r7, r10
424; P8BE-NEXT:    sub r3, r4, r3
425; P8BE-NEXT:    sldi r5, r5, 48
426; P8BE-NEXT:    sldi r6, r6, 48
427; P8BE-NEXT:    sldi r4, r7, 48
428; P8BE-NEXT:    mtvsrd v2, r5
429; P8BE-NEXT:    sldi r3, r3, 48
430; P8BE-NEXT:    mtvsrd v3, r6
431; P8BE-NEXT:    mtvsrd v4, r4
432; P8BE-NEXT:    mtvsrd v5, r3
433; P8BE-NEXT:    vmrghh v2, v3, v2
434; P8BE-NEXT:    vmrghh v3, v5, v4
435; P8BE-NEXT:    vmrghw v2, v3, v2
436; P8BE-NEXT:    blr
437  %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
438  ret <4 x i16> %1
439}
440
441
442; Don't fold if we can combine urem with udiv.
443define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
444; P9LE-LABEL: combine_urem_udiv:
445; P9LE:       # %bb.0:
446; P9LE-NEXT:    li r3, 0
447; P9LE-NEXT:    vextuhrx r3, r3, v2
448; P9LE-NEXT:    lis r4, 22765
449; P9LE-NEXT:    ori r4, r4, 8969
450; P9LE-NEXT:    clrlwi r3, r3, 16
451; P9LE-NEXT:    mulhwu r5, r3, r4
452; P9LE-NEXT:    sub r6, r3, r5
453; P9LE-NEXT:    srwi r6, r6, 1
454; P9LE-NEXT:    add r5, r6, r5
455; P9LE-NEXT:    srwi r5, r5, 6
456; P9LE-NEXT:    mulli r6, r5, 95
457; P9LE-NEXT:    sub r3, r3, r6
458; P9LE-NEXT:    mtvsrd v3, r3
459; P9LE-NEXT:    li r3, 2
460; P9LE-NEXT:    vextuhrx r3, r3, v2
461; P9LE-NEXT:    clrlwi r6, r3, 16
462; P9LE-NEXT:    mulhwu r7, r6, r4
463; P9LE-NEXT:    sub r6, r6, r7
464; P9LE-NEXT:    srwi r6, r6, 1
465; P9LE-NEXT:    add r6, r6, r7
466; P9LE-NEXT:    srwi r6, r6, 6
467; P9LE-NEXT:    mulli r7, r6, 95
468; P9LE-NEXT:    sub r3, r3, r7
469; P9LE-NEXT:    mtvsrd v4, r3
470; P9LE-NEXT:    li r3, 4
471; P9LE-NEXT:    vextuhrx r3, r3, v2
472; P9LE-NEXT:    clrlwi r7, r3, 16
473; P9LE-NEXT:    mulhwu r8, r7, r4
474; P9LE-NEXT:    sub r7, r7, r8
475; P9LE-NEXT:    srwi r7, r7, 1
476; P9LE-NEXT:    add r7, r7, r8
477; P9LE-NEXT:    srwi r7, r7, 6
478; P9LE-NEXT:    mulli r8, r7, 95
479; P9LE-NEXT:    sub r3, r3, r8
480; P9LE-NEXT:    vmrghh v3, v4, v3
481; P9LE-NEXT:    mtvsrd v4, r3
482; P9LE-NEXT:    li r3, 6
483; P9LE-NEXT:    vextuhrx r3, r3, v2
484; P9LE-NEXT:    clrlwi r8, r3, 16
485; P9LE-NEXT:    mulhwu r4, r8, r4
486; P9LE-NEXT:    sub r8, r8, r4
487; P9LE-NEXT:    srwi r8, r8, 1
488; P9LE-NEXT:    add r4, r8, r4
489; P9LE-NEXT:    srwi r4, r4, 6
490; P9LE-NEXT:    mulli r8, r4, 95
491; P9LE-NEXT:    sub r3, r3, r8
492; P9LE-NEXT:    mtvsrd v2, r3
493; P9LE-NEXT:    vmrghh v2, v2, v4
494; P9LE-NEXT:    mtvsrd v4, r6
495; P9LE-NEXT:    vmrglw v2, v2, v3
496; P9LE-NEXT:    mtvsrd v3, r5
497; P9LE-NEXT:    vmrghh v3, v4, v3
498; P9LE-NEXT:    mtvsrd v4, r7
499; P9LE-NEXT:    mtvsrd v5, r4
500; P9LE-NEXT:    vmrghh v4, v5, v4
501; P9LE-NEXT:    vmrglw v3, v4, v3
502; P9LE-NEXT:    vadduhm v2, v2, v3
503; P9LE-NEXT:    blr
504;
505; P9BE-LABEL: combine_urem_udiv:
506; P9BE:       # %bb.0:
507; P9BE-NEXT:    li r3, 6
508; P9BE-NEXT:    vextuhlx r3, r3, v2
509; P9BE-NEXT:    lis r5, 22765
510; P9BE-NEXT:    ori r5, r5, 8969
511; P9BE-NEXT:    clrlwi r4, r3, 16
512; P9BE-NEXT:    mulhwu r6, r4, r5
513; P9BE-NEXT:    sub r4, r4, r6
514; P9BE-NEXT:    srwi r4, r4, 1
515; P9BE-NEXT:    add r4, r4, r6
516; P9BE-NEXT:    srwi r4, r4, 6
517; P9BE-NEXT:    mulli r6, r4, 95
518; P9BE-NEXT:    sub r3, r3, r6
519; P9BE-NEXT:    sldi r3, r3, 48
520; P9BE-NEXT:    mtvsrd v3, r3
521; P9BE-NEXT:    li r3, 4
522; P9BE-NEXT:    vextuhlx r3, r3, v2
523; P9BE-NEXT:    clrlwi r6, r3, 16
524; P9BE-NEXT:    mulhwu r7, r6, r5
525; P9BE-NEXT:    sub r6, r6, r7
526; P9BE-NEXT:    srwi r6, r6, 1
527; P9BE-NEXT:    add r6, r6, r7
528; P9BE-NEXT:    srwi r6, r6, 6
529; P9BE-NEXT:    mulli r7, r6, 95
530; P9BE-NEXT:    sub r3, r3, r7
531; P9BE-NEXT:    sldi r3, r3, 48
532; P9BE-NEXT:    mtvsrd v4, r3
533; P9BE-NEXT:    li r3, 2
534; P9BE-NEXT:    vextuhlx r3, r3, v2
535; P9BE-NEXT:    clrlwi r7, r3, 16
536; P9BE-NEXT:    mulhwu r8, r7, r5
537; P9BE-NEXT:    sub r7, r7, r8
538; P9BE-NEXT:    srwi r7, r7, 1
539; P9BE-NEXT:    add r7, r7, r8
540; P9BE-NEXT:    srwi r7, r7, 6
541; P9BE-NEXT:    mulli r8, r7, 95
542; P9BE-NEXT:    sub r3, r3, r8
543; P9BE-NEXT:    sldi r3, r3, 48
544; P9BE-NEXT:    vmrghh v3, v4, v3
545; P9BE-NEXT:    mtvsrd v4, r3
546; P9BE-NEXT:    li r3, 0
547; P9BE-NEXT:    vextuhlx r3, r3, v2
548; P9BE-NEXT:    clrlwi r3, r3, 16
549; P9BE-NEXT:    mulhwu r5, r3, r5
550; P9BE-NEXT:    sub r8, r3, r5
551; P9BE-NEXT:    srwi r8, r8, 1
552; P9BE-NEXT:    add r5, r8, r5
553; P9BE-NEXT:    srwi r5, r5, 6
554; P9BE-NEXT:    mulli r8, r5, 95
555; P9BE-NEXT:    sub r3, r3, r8
556; P9BE-NEXT:    sldi r3, r3, 48
557; P9BE-NEXT:    mtvsrd v2, r3
558; P9BE-NEXT:    sldi r3, r4, 48
559; P9BE-NEXT:    vmrghh v2, v2, v4
560; P9BE-NEXT:    vmrghw v2, v2, v3
561; P9BE-NEXT:    mtvsrd v3, r3
562; P9BE-NEXT:    sldi r3, r6, 48
563; P9BE-NEXT:    mtvsrd v4, r3
564; P9BE-NEXT:    sldi r3, r7, 48
565; P9BE-NEXT:    vmrghh v3, v4, v3
566; P9BE-NEXT:    mtvsrd v4, r3
567; P9BE-NEXT:    sldi r3, r5, 48
568; P9BE-NEXT:    mtvsrd v5, r3
569; P9BE-NEXT:    vmrghh v4, v5, v4
570; P9BE-NEXT:    vmrghw v3, v4, v3
571; P9BE-NEXT:    vadduhm v2, v2, v3
572; P9BE-NEXT:    blr
573;
574; P8LE-LABEL: combine_urem_udiv:
575; P8LE:       # %bb.0:
576; P8LE-NEXT:    xxswapd vs0, v2
577; P8LE-NEXT:    lis r3, 22765
578; P8LE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
579; P8LE-NEXT:    ori r3, r3, 8969
580; P8LE-NEXT:    mffprd r4, f0
581; P8LE-NEXT:    clrldi r5, r4, 48
582; P8LE-NEXT:    rldicl r6, r4, 48, 48
583; P8LE-NEXT:    clrlwi r5, r5, 16
584; P8LE-NEXT:    clrlwi r8, r6, 16
585; P8LE-NEXT:    rldicl r7, r4, 32, 48
586; P8LE-NEXT:    rldicl r4, r4, 16, 48
587; P8LE-NEXT:    mulhwu r9, r5, r3
588; P8LE-NEXT:    mulhwu r11, r8, r3
589; P8LE-NEXT:    clrlwi r10, r7, 16
590; P8LE-NEXT:    clrlwi r12, r4, 16
591; P8LE-NEXT:    mulhwu r0, r10, r3
592; P8LE-NEXT:    mulhwu r3, r12, r3
593; P8LE-NEXT:    sub r30, r5, r9
594; P8LE-NEXT:    sub r8, r8, r11
595; P8LE-NEXT:    srwi r30, r30, 1
596; P8LE-NEXT:    srwi r8, r8, 1
597; P8LE-NEXT:    sub r10, r10, r0
598; P8LE-NEXT:    add r9, r30, r9
599; P8LE-NEXT:    add r8, r8, r11
600; P8LE-NEXT:    sub r11, r12, r3
601; P8LE-NEXT:    srwi r10, r10, 1
602; P8LE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
603; P8LE-NEXT:    srwi r9, r9, 6
604; P8LE-NEXT:    srwi r11, r11, 1
605; P8LE-NEXT:    srwi r8, r8, 6
606; P8LE-NEXT:    add r10, r10, r0
607; P8LE-NEXT:    mulli r12, r9, 95
608; P8LE-NEXT:    add r3, r11, r3
609; P8LE-NEXT:    mtvsrd v2, r9
610; P8LE-NEXT:    srwi r10, r10, 6
611; P8LE-NEXT:    mulli r9, r8, 95
612; P8LE-NEXT:    srwi r3, r3, 6
613; P8LE-NEXT:    mtvsrd v3, r8
614; P8LE-NEXT:    mulli r8, r10, 95
615; P8LE-NEXT:    mtvsrd v4, r10
616; P8LE-NEXT:    mulli r10, r3, 95
617; P8LE-NEXT:    vmrghh v2, v3, v2
618; P8LE-NEXT:    sub r5, r5, r12
619; P8LE-NEXT:    sub r6, r6, r9
620; P8LE-NEXT:    mtvsrd v3, r5
621; P8LE-NEXT:    mtvsrd v5, r6
622; P8LE-NEXT:    sub r5, r7, r8
623; P8LE-NEXT:    sub r4, r4, r10
624; P8LE-NEXT:    mtvsrd v0, r5
625; P8LE-NEXT:    mtvsrd v1, r4
626; P8LE-NEXT:    vmrghh v3, v5, v3
627; P8LE-NEXT:    mtvsrd v5, r3
628; P8LE-NEXT:    vmrghh v0, v1, v0
629; P8LE-NEXT:    vmrghh v4, v5, v4
630; P8LE-NEXT:    vmrglw v3, v0, v3
631; P8LE-NEXT:    vmrglw v2, v4, v2
632; P8LE-NEXT:    vadduhm v2, v3, v2
633; P8LE-NEXT:    blr
634;
635; P8BE-LABEL: combine_urem_udiv:
636; P8BE:       # %bb.0:
637; P8BE-NEXT:    mfvsrd r5, v2
638; P8BE-NEXT:    lis r4, 22765
639; P8BE-NEXT:    ori r4, r4, 8969
640; P8BE-NEXT:    clrldi r3, r5, 48
641; P8BE-NEXT:    rldicl r6, r5, 48, 48
642; P8BE-NEXT:    clrlwi r8, r3, 16
643; P8BE-NEXT:    rldicl r7, r5, 32, 48
644; P8BE-NEXT:    clrlwi r9, r6, 16
645; P8BE-NEXT:    rldicl r5, r5, 16, 48
646; P8BE-NEXT:    mulhwu r10, r8, r4
647; P8BE-NEXT:    clrlwi r11, r7, 16
648; P8BE-NEXT:    mulhwu r12, r9, r4
649; P8BE-NEXT:    clrlwi r5, r5, 16
650; P8BE-NEXT:    mulhwu r0, r11, r4
651; P8BE-NEXT:    mulhwu r4, r5, r4
652; P8BE-NEXT:    sub r8, r8, r10
653; P8BE-NEXT:    sub r9, r9, r12
654; P8BE-NEXT:    srwi r8, r8, 1
655; P8BE-NEXT:    add r8, r8, r10
656; P8BE-NEXT:    sub r10, r11, r0
657; P8BE-NEXT:    srwi r9, r9, 1
658; P8BE-NEXT:    sub r11, r5, r4
659; P8BE-NEXT:    add r9, r9, r12
660; P8BE-NEXT:    srwi r8, r8, 6
661; P8BE-NEXT:    srwi r11, r11, 1
662; P8BE-NEXT:    srwi r10, r10, 1
663; P8BE-NEXT:    srwi r9, r9, 6
664; P8BE-NEXT:    mulli r12, r8, 95
665; P8BE-NEXT:    add r4, r11, r4
666; P8BE-NEXT:    add r10, r10, r0
667; P8BE-NEXT:    mulli r11, r9, 95
668; P8BE-NEXT:    srwi r4, r4, 6
669; P8BE-NEXT:    srwi r10, r10, 6
670; P8BE-NEXT:    sldi r9, r9, 48
671; P8BE-NEXT:    sldi r8, r8, 48
672; P8BE-NEXT:    mtvsrd v3, r9
673; P8BE-NEXT:    mulli r9, r4, 95
674; P8BE-NEXT:    mtvsrd v2, r8
675; P8BE-NEXT:    mulli r8, r10, 95
676; P8BE-NEXT:    sub r3, r3, r12
677; P8BE-NEXT:    sub r6, r6, r11
678; P8BE-NEXT:    sldi r3, r3, 48
679; P8BE-NEXT:    vmrghh v2, v3, v2
680; P8BE-NEXT:    sldi r6, r6, 48
681; P8BE-NEXT:    sldi r10, r10, 48
682; P8BE-NEXT:    mtvsrd v3, r3
683; P8BE-NEXT:    sub r3, r5, r9
684; P8BE-NEXT:    sub r7, r7, r8
685; P8BE-NEXT:    mtvsrd v5, r6
686; P8BE-NEXT:    sldi r3, r3, 48
687; P8BE-NEXT:    sldi r5, r7, 48
688; P8BE-NEXT:    mtvsrd v1, r3
689; P8BE-NEXT:    sldi r3, r4, 48
690; P8BE-NEXT:    mtvsrd v4, r10
691; P8BE-NEXT:    mtvsrd v0, r5
692; P8BE-NEXT:    vmrghh v3, v5, v3
693; P8BE-NEXT:    mtvsrd v5, r3
694; P8BE-NEXT:    vmrghh v0, v1, v0
695; P8BE-NEXT:    vmrghh v4, v5, v4
696; P8BE-NEXT:    vmrghw v3, v0, v3
697; P8BE-NEXT:    vmrghw v2, v4, v2
698; P8BE-NEXT:    vadduhm v2, v3, v2
699; P8BE-NEXT:    blr
700  %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
701  %2 = udiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
702  %3 = add <4 x i16> %1, %2
703  ret <4 x i16> %3
704}
705
706; Don't fold for divisors that are a power of two.
707define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
708; P9LE-LABEL: dont_fold_urem_power_of_two:
709; P9LE:       # %bb.0:
710; P9LE-NEXT:    li r3, 0
711; P9LE-NEXT:    vextuhrx r3, r3, v2
712; P9LE-NEXT:    clrlwi r3, r3, 26
713; P9LE-NEXT:    mtvsrd v3, r3
714; P9LE-NEXT:    li r3, 2
715; P9LE-NEXT:    vextuhrx r3, r3, v2
716; P9LE-NEXT:    clrlwi r3, r3, 27
717; P9LE-NEXT:    mtvsrd v4, r3
718; P9LE-NEXT:    li r3, 6
719; P9LE-NEXT:    vextuhrx r3, r3, v2
720; P9LE-NEXT:    lis r4, 22765
721; P9LE-NEXT:    ori r4, r4, 8969
722; P9LE-NEXT:    vmrghh v3, v4, v3
723; P9LE-NEXT:    clrlwi r3, r3, 16
724; P9LE-NEXT:    mulhwu r4, r3, r4
725; P9LE-NEXT:    sub r5, r3, r4
726; P9LE-NEXT:    srwi r5, r5, 1
727; P9LE-NEXT:    add r4, r5, r4
728; P9LE-NEXT:    srwi r4, r4, 6
729; P9LE-NEXT:    mulli r4, r4, 95
730; P9LE-NEXT:    sub r3, r3, r4
731; P9LE-NEXT:    mtvsrd v4, r3
732; P9LE-NEXT:    li r3, 4
733; P9LE-NEXT:    vextuhrx r3, r3, v2
734; P9LE-NEXT:    clrlwi r3, r3, 29
735; P9LE-NEXT:    mtvsrd v2, r3
736; P9LE-NEXT:    vmrghh v2, v4, v2
737; P9LE-NEXT:    vmrglw v2, v2, v3
738; P9LE-NEXT:    blr
739;
740; P9BE-LABEL: dont_fold_urem_power_of_two:
741; P9BE:       # %bb.0:
742; P9BE-NEXT:    li r3, 2
743; P9BE-NEXT:    vextuhlx r3, r3, v2
744; P9BE-NEXT:    clrlwi r3, r3, 27
745; P9BE-NEXT:    sldi r3, r3, 48
746; P9BE-NEXT:    mtvsrd v3, r3
747; P9BE-NEXT:    li r3, 0
748; P9BE-NEXT:    vextuhlx r3, r3, v2
749; P9BE-NEXT:    clrlwi r3, r3, 26
750; P9BE-NEXT:    sldi r3, r3, 48
751; P9BE-NEXT:    mtvsrd v4, r3
752; P9BE-NEXT:    li r3, 6
753; P9BE-NEXT:    vextuhlx r3, r3, v2
754; P9BE-NEXT:    lis r4, 22765
755; P9BE-NEXT:    ori r4, r4, 8969
756; P9BE-NEXT:    vmrghh v3, v4, v3
757; P9BE-NEXT:    clrlwi r3, r3, 16
758; P9BE-NEXT:    mulhwu r4, r3, r4
759; P9BE-NEXT:    sub r5, r3, r4
760; P9BE-NEXT:    srwi r5, r5, 1
761; P9BE-NEXT:    add r4, r5, r4
762; P9BE-NEXT:    srwi r4, r4, 6
763; P9BE-NEXT:    mulli r4, r4, 95
764; P9BE-NEXT:    sub r3, r3, r4
765; P9BE-NEXT:    sldi r3, r3, 48
766; P9BE-NEXT:    mtvsrd v4, r3
767; P9BE-NEXT:    li r3, 4
768; P9BE-NEXT:    vextuhlx r3, r3, v2
769; P9BE-NEXT:    clrlwi r3, r3, 29
770; P9BE-NEXT:    sldi r3, r3, 48
771; P9BE-NEXT:    mtvsrd v2, r3
772; P9BE-NEXT:    vmrghh v2, v2, v4
773; P9BE-NEXT:    vmrghw v2, v3, v2
774; P9BE-NEXT:    blr
775;
776; P8LE-LABEL: dont_fold_urem_power_of_two:
777; P8LE:       # %bb.0:
778; P8LE-NEXT:    xxswapd vs0, v2
779; P8LE-NEXT:    lis r3, 22765
780; P8LE-NEXT:    ori r3, r3, 8969
781; P8LE-NEXT:    mffprd r4, f0
782; P8LE-NEXT:    rldicl r5, r4, 16, 48
783; P8LE-NEXT:    rldicl r7, r4, 48, 48
784; P8LE-NEXT:    clrlwi r5, r5, 16
785; P8LE-NEXT:    mulhwu r3, r5, r3
786; P8LE-NEXT:    sub r6, r5, r3
787; P8LE-NEXT:    srwi r6, r6, 1
788; P8LE-NEXT:    add r3, r6, r3
789; P8LE-NEXT:    clrldi r6, r4, 48
790; P8LE-NEXT:    srwi r3, r3, 6
791; P8LE-NEXT:    clrlwi r6, r6, 26
792; P8LE-NEXT:    mulli r3, r3, 95
793; P8LE-NEXT:    rldicl r4, r4, 32, 48
794; P8LE-NEXT:    mtvsrd v2, r6
795; P8LE-NEXT:    clrlwi r6, r7, 27
796; P8LE-NEXT:    clrlwi r4, r4, 29
797; P8LE-NEXT:    mtvsrd v3, r6
798; P8LE-NEXT:    mtvsrd v5, r4
799; P8LE-NEXT:    vmrghh v2, v3, v2
800; P8LE-NEXT:    sub r3, r5, r3
801; P8LE-NEXT:    mtvsrd v4, r3
802; P8LE-NEXT:    vmrghh v3, v4, v5
803; P8LE-NEXT:    vmrglw v2, v3, v2
804; P8LE-NEXT:    blr
805;
806; P8BE-LABEL: dont_fold_urem_power_of_two:
807; P8BE:       # %bb.0:
808; P8BE-NEXT:    mfvsrd r4, v2
809; P8BE-NEXT:    lis r3, 22765
810; P8BE-NEXT:    ori r3, r3, 8969
811; P8BE-NEXT:    clrldi r5, r4, 48
812; P8BE-NEXT:    rldicl r7, r4, 16, 48
813; P8BE-NEXT:    clrlwi r5, r5, 16
814; P8BE-NEXT:    clrlwi r7, r7, 26
815; P8BE-NEXT:    mulhwu r3, r5, r3
816; P8BE-NEXT:    sub r6, r5, r3
817; P8BE-NEXT:    srwi r6, r6, 1
818; P8BE-NEXT:    add r3, r6, r3
819; P8BE-NEXT:    rldicl r6, r4, 32, 48
820; P8BE-NEXT:    srwi r3, r3, 6
821; P8BE-NEXT:    rldicl r4, r4, 48, 48
822; P8BE-NEXT:    clrlwi r6, r6, 27
823; P8BE-NEXT:    mulli r3, r3, 95
824; P8BE-NEXT:    sldi r6, r6, 48
825; P8BE-NEXT:    clrlwi r4, r4, 29
826; P8BE-NEXT:    mtvsrd v2, r6
827; P8BE-NEXT:    sldi r6, r7, 48
828; P8BE-NEXT:    sldi r4, r4, 48
829; P8BE-NEXT:    mtvsrd v3, r6
830; P8BE-NEXT:    mtvsrd v5, r4
831; P8BE-NEXT:    sub r3, r5, r3
832; P8BE-NEXT:    vmrghh v2, v3, v2
833; P8BE-NEXT:    sldi r3, r3, 48
834; P8BE-NEXT:    mtvsrd v4, r3
835; P8BE-NEXT:    vmrghh v3, v5, v4
836; P8BE-NEXT:    vmrghw v2, v2, v3
837; P8BE-NEXT:    blr
838  %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
839  ret <4 x i16> %1
840}
841
842; Don't fold if the divisor is one.
843define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
844; P9LE-LABEL: dont_fold_urem_one:
845; P9LE:       # %bb.0:
846; P9LE-NEXT:    li r3, 4
847; P9LE-NEXT:    vextuhrx r3, r3, v2
848; P9LE-NEXT:    lis r4, -19946
849; P9LE-NEXT:    ori r4, r4, 17097
850; P9LE-NEXT:    clrlwi r3, r3, 16
851; P9LE-NEXT:    mulhwu r4, r3, r4
852; P9LE-NEXT:    srwi r4, r4, 4
853; P9LE-NEXT:    mulli r4, r4, 23
854; P9LE-NEXT:    sub r3, r3, r4
855; P9LE-NEXT:    lis r4, 24749
856; P9LE-NEXT:    mtvsrd v3, r3
857; P9LE-NEXT:    li r3, 6
858; P9LE-NEXT:    vextuhrx r3, r3, v2
859; P9LE-NEXT:    clrlwi r3, r3, 16
860; P9LE-NEXT:    ori r4, r4, 47143
861; P9LE-NEXT:    mulhwu r4, r3, r4
862; P9LE-NEXT:    srwi r4, r4, 11
863; P9LE-NEXT:    mulli r4, r4, 5423
864; P9LE-NEXT:    sub r3, r3, r4
865; P9LE-NEXT:    mtvsrd v4, r3
866; P9LE-NEXT:    li r3, 2
867; P9LE-NEXT:    vextuhrx r3, r3, v2
868; P9LE-NEXT:    lis r5, -14230
869; P9LE-NEXT:    ori r5, r5, 30865
870; P9LE-NEXT:    vmrghh v3, v4, v3
871; P9LE-NEXT:    clrlwi r4, r3, 16
872; P9LE-NEXT:    rlwinm r3, r3, 31, 17, 31
873; P9LE-NEXT:    mulhwu r3, r3, r5
874; P9LE-NEXT:    srwi r3, r3, 8
875; P9LE-NEXT:    mulli r3, r3, 654
876; P9LE-NEXT:    sub r3, r4, r3
877; P9LE-NEXT:    mtvsrd v2, r3
878; P9LE-NEXT:    li r3, 0
879; P9LE-NEXT:    mtvsrd v4, r3
880; P9LE-NEXT:    vmrghh v2, v2, v4
881; P9LE-NEXT:    vmrglw v2, v3, v2
882; P9LE-NEXT:    blr
883;
884; P9BE-LABEL: dont_fold_urem_one:
885; P9BE:       # %bb.0:
886; P9BE-NEXT:    li r3, 6
887; P9BE-NEXT:    vextuhlx r3, r3, v2
888; P9BE-NEXT:    lis r4, 24749
889; P9BE-NEXT:    ori r4, r4, 47143
890; P9BE-NEXT:    clrlwi r3, r3, 16
891; P9BE-NEXT:    mulhwu r4, r3, r4
892; P9BE-NEXT:    srwi r4, r4, 11
893; P9BE-NEXT:    mulli r4, r4, 5423
894; P9BE-NEXT:    sub r3, r3, r4
895; P9BE-NEXT:    lis r4, -19946
896; P9BE-NEXT:    sldi r3, r3, 48
897; P9BE-NEXT:    mtvsrd v3, r3
898; P9BE-NEXT:    li r3, 4
899; P9BE-NEXT:    vextuhlx r3, r3, v2
900; P9BE-NEXT:    clrlwi r3, r3, 16
901; P9BE-NEXT:    ori r4, r4, 17097
902; P9BE-NEXT:    mulhwu r4, r3, r4
903; P9BE-NEXT:    srwi r4, r4, 4
904; P9BE-NEXT:    mulli r4, r4, 23
905; P9BE-NEXT:    sub r3, r3, r4
906; P9BE-NEXT:    sldi r3, r3, 48
907; P9BE-NEXT:    mtvsrd v4, r3
908; P9BE-NEXT:    li r3, 2
909; P9BE-NEXT:    vextuhlx r3, r3, v2
910; P9BE-NEXT:    lis r5, -14230
911; P9BE-NEXT:    ori r5, r5, 30865
912; P9BE-NEXT:    vmrghh v3, v4, v3
913; P9BE-NEXT:    clrlwi r4, r3, 16
914; P9BE-NEXT:    rlwinm r3, r3, 31, 17, 31
915; P9BE-NEXT:    mulhwu r3, r3, r5
916; P9BE-NEXT:    srwi r3, r3, 8
917; P9BE-NEXT:    mulli r3, r3, 654
918; P9BE-NEXT:    sub r3, r4, r3
919; P9BE-NEXT:    sldi r3, r3, 48
920; P9BE-NEXT:    mtvsrd v2, r3
921; P9BE-NEXT:    li r3, 0
922; P9BE-NEXT:    sldi r3, r3, 48
923; P9BE-NEXT:    mtvsrd v4, r3
924; P9BE-NEXT:    vmrghh v2, v4, v2
925; P9BE-NEXT:    vmrghw v2, v2, v3
926; P9BE-NEXT:    blr
927;
928; P8LE-LABEL: dont_fold_urem_one:
929; P8LE:       # %bb.0:
930; P8LE-NEXT:    xxswapd vs0, v2
931; P8LE-NEXT:    lis r3, -14230
932; P8LE-NEXT:    lis r7, -19946
933; P8LE-NEXT:    lis r9, 24749
934; P8LE-NEXT:    ori r3, r3, 30865
935; P8LE-NEXT:    ori r7, r7, 17097
936; P8LE-NEXT:    mffprd r4, f0
937; P8LE-NEXT:    rldicl r5, r4, 48, 48
938; P8LE-NEXT:    rldicl r6, r4, 32, 48
939; P8LE-NEXT:    rldicl r4, r4, 16, 48
940; P8LE-NEXT:    rlwinm r8, r5, 31, 17, 31
941; P8LE-NEXT:    clrlwi r6, r6, 16
942; P8LE-NEXT:    clrlwi r5, r5, 16
943; P8LE-NEXT:    mulhwu r3, r8, r3
944; P8LE-NEXT:    ori r8, r9, 47143
945; P8LE-NEXT:    clrlwi r4, r4, 16
946; P8LE-NEXT:    li r9, 0
947; P8LE-NEXT:    mulhwu r7, r6, r7
948; P8LE-NEXT:    mulhwu r8, r4, r8
949; P8LE-NEXT:    mtvsrd v2, r9
950; P8LE-NEXT:    srwi r3, r3, 8
951; P8LE-NEXT:    srwi r7, r7, 4
952; P8LE-NEXT:    mulli r3, r3, 654
953; P8LE-NEXT:    srwi r8, r8, 11
954; P8LE-NEXT:    mulli r7, r7, 23
955; P8LE-NEXT:    mulli r8, r8, 5423
956; P8LE-NEXT:    sub r3, r5, r3
957; P8LE-NEXT:    sub r5, r6, r7
958; P8LE-NEXT:    mtvsrd v3, r3
959; P8LE-NEXT:    sub r3, r4, r8
960; P8LE-NEXT:    mtvsrd v4, r5
961; P8LE-NEXT:    mtvsrd v5, r3
962; P8LE-NEXT:    vmrghh v2, v3, v2
963; P8LE-NEXT:    vmrghh v3, v5, v4
964; P8LE-NEXT:    vmrglw v2, v3, v2
965; P8LE-NEXT:    blr
966;
967; P8BE-LABEL: dont_fold_urem_one:
968; P8BE:       # %bb.0:
969; P8BE-NEXT:    mfvsrd r4, v2
970; P8BE-NEXT:    lis r3, 24749
971; P8BE-NEXT:    lis r7, -19946
972; P8BE-NEXT:    lis r8, -14230
973; P8BE-NEXT:    ori r3, r3, 47143
974; P8BE-NEXT:    ori r7, r7, 17097
975; P8BE-NEXT:    ori r8, r8, 30865
976; P8BE-NEXT:    clrldi r5, r4, 48
977; P8BE-NEXT:    rldicl r6, r4, 48, 48
978; P8BE-NEXT:    rldicl r4, r4, 32, 48
979; P8BE-NEXT:    clrlwi r5, r5, 16
980; P8BE-NEXT:    clrlwi r6, r6, 16
981; P8BE-NEXT:    mulhwu r3, r5, r3
982; P8BE-NEXT:    rlwinm r9, r4, 31, 17, 31
983; P8BE-NEXT:    clrlwi r4, r4, 16
984; P8BE-NEXT:    mulhwu r7, r6, r7
985; P8BE-NEXT:    mulhwu r8, r9, r8
986; P8BE-NEXT:    li r9, 0
987; P8BE-NEXT:    srwi r3, r3, 11
988; P8BE-NEXT:    srwi r7, r7, 4
989; P8BE-NEXT:    mulli r3, r3, 5423
990; P8BE-NEXT:    srwi r8, r8, 8
991; P8BE-NEXT:    mulli r7, r7, 23
992; P8BE-NEXT:    mulli r8, r8, 654
993; P8BE-NEXT:    sub r3, r5, r3
994; P8BE-NEXT:    sldi r5, r9, 48
995; P8BE-NEXT:    mtvsrd v2, r5
996; P8BE-NEXT:    sub r5, r6, r7
997; P8BE-NEXT:    sldi r3, r3, 48
998; P8BE-NEXT:    sub r4, r4, r8
999; P8BE-NEXT:    sldi r5, r5, 48
1000; P8BE-NEXT:    mtvsrd v3, r3
1001; P8BE-NEXT:    sldi r3, r4, 48
1002; P8BE-NEXT:    mtvsrd v4, r5
1003; P8BE-NEXT:    mtvsrd v5, r3
1004; P8BE-NEXT:    vmrghh v3, v4, v3
1005; P8BE-NEXT:    vmrghh v2, v2, v5
1006; P8BE-NEXT:    vmrghw v2, v2, v3
1007; P8BE-NEXT:    blr
1008  %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
1009  ret <4 x i16> %1
1010}
1011
1012; Don't fold if the divisor is 2^16.
1013define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
1014; CHECK-LABEL: dont_fold_urem_i16_smax:
1015; CHECK:       # %bb.0:
1016; CHECK-NEXT:    blr
1017  %1 = urem <4 x i16> %x, <i16 1, i16 65536, i16 23, i16 5423>
1018  ret <4 x i16> %1
1019}
1020
1021; Don't fold i64 urem.
1022define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
1023; P9LE-LABEL: dont_fold_urem_i64:
1024; P9LE:       # %bb.0:
1025; P9LE-NEXT:    lis r4, 25644
1026; P9LE-NEXT:    ori r4, r4, 34192
1027; P9LE-NEXT:    sldi r4, r4, 32
1028; P9LE-NEXT:    oris r4, r4, 45590
1029; P9LE-NEXT:    mfvsrld r3, v3
1030; P9LE-NEXT:    ori r4, r4, 17097
1031; P9LE-NEXT:    mulhdu r4, r3, r4
1032; P9LE-NEXT:    sub r5, r3, r4
1033; P9LE-NEXT:    rldicl r5, r5, 63, 1
1034; P9LE-NEXT:    add r4, r5, r4
1035; P9LE-NEXT:    lis r5, -16037
1036; P9LE-NEXT:    rldicl r4, r4, 60, 4
1037; P9LE-NEXT:    ori r5, r5, 28749
1038; P9LE-NEXT:    mulli r4, r4, 23
1039; P9LE-NEXT:    sldi r5, r5, 32
1040; P9LE-NEXT:    oris r5, r5, 52170
1041; P9LE-NEXT:    ori r5, r5, 12109
1042; P9LE-NEXT:    sub r3, r3, r4
1043; P9LE-NEXT:    mfvsrd r4, v3
1044; P9LE-NEXT:    mulhdu r5, r4, r5
1045; P9LE-NEXT:    rldicl r5, r5, 52, 12
1046; P9LE-NEXT:    mulli r5, r5, 5423
1047; P9LE-NEXT:    sub r4, r4, r5
1048; P9LE-NEXT:    lis r5, 25653
1049; P9LE-NEXT:    ori r5, r5, 15432
1050; P9LE-NEXT:    sldi r5, r5, 32
1051; P9LE-NEXT:    mtvsrdd v3, r4, r3
1052; P9LE-NEXT:    mfvsrd r3, v2
1053; P9LE-NEXT:    rldicl r4, r3, 63, 1
1054; P9LE-NEXT:    oris r5, r5, 1603
1055; P9LE-NEXT:    ori r5, r5, 21445
1056; P9LE-NEXT:    mulhdu r4, r4, r5
1057; P9LE-NEXT:    rldicl r4, r4, 57, 7
1058; P9LE-NEXT:    mulli r4, r4, 654
1059; P9LE-NEXT:    sub r3, r3, r4
1060; P9LE-NEXT:    li r4, 0
1061; P9LE-NEXT:    mtvsrdd v2, r3, r4
1062; P9LE-NEXT:    blr
1063;
1064; P9BE-LABEL: dont_fold_urem_i64:
1065; P9BE:       # %bb.0:
1066; P9BE-NEXT:    lis r4, 25644
1067; P9BE-NEXT:    ori r4, r4, 34192
1068; P9BE-NEXT:    sldi r4, r4, 32
1069; P9BE-NEXT:    oris r4, r4, 45590
1070; P9BE-NEXT:    mfvsrd r3, v3
1071; P9BE-NEXT:    ori r4, r4, 17097
1072; P9BE-NEXT:    mulhdu r4, r3, r4
1073; P9BE-NEXT:    sub r5, r3, r4
1074; P9BE-NEXT:    rldicl r5, r5, 63, 1
1075; P9BE-NEXT:    add r4, r5, r4
1076; P9BE-NEXT:    lis r5, -16037
1077; P9BE-NEXT:    rldicl r4, r4, 60, 4
1078; P9BE-NEXT:    mulli r4, r4, 23
1079; P9BE-NEXT:    ori r5, r5, 28749
1080; P9BE-NEXT:    sldi r5, r5, 32
1081; P9BE-NEXT:    oris r5, r5, 52170
1082; P9BE-NEXT:    ori r5, r5, 12109
1083; P9BE-NEXT:    sub r3, r3, r4
1084; P9BE-NEXT:    mfvsrld r4, v3
1085; P9BE-NEXT:    mulhdu r5, r4, r5
1086; P9BE-NEXT:    rldicl r5, r5, 52, 12
1087; P9BE-NEXT:    mulli r5, r5, 5423
1088; P9BE-NEXT:    sub r4, r4, r5
1089; P9BE-NEXT:    lis r5, 25653
1090; P9BE-NEXT:    ori r5, r5, 15432
1091; P9BE-NEXT:    sldi r5, r5, 32
1092; P9BE-NEXT:    mtvsrdd v3, r3, r4
1093; P9BE-NEXT:    mfvsrld r3, v2
1094; P9BE-NEXT:    rldicl r4, r3, 63, 1
1095; P9BE-NEXT:    oris r5, r5, 1603
1096; P9BE-NEXT:    ori r5, r5, 21445
1097; P9BE-NEXT:    mulhdu r4, r4, r5
1098; P9BE-NEXT:    rldicl r4, r4, 57, 7
1099; P9BE-NEXT:    mulli r4, r4, 654
1100; P9BE-NEXT:    sub r3, r3, r4
1101; P9BE-NEXT:    mtvsrdd v2, 0, r3
1102; P9BE-NEXT:    blr
1103;
1104; P8LE-LABEL: dont_fold_urem_i64:
1105; P8LE:       # %bb.0:
1106; P8LE-NEXT:    lis r3, 25644
1107; P8LE-NEXT:    xxswapd vs0, v3
1108; P8LE-NEXT:    lis r4, -16037
1109; P8LE-NEXT:    lis r5, 25653
1110; P8LE-NEXT:    mfvsrd r6, v2
1111; P8LE-NEXT:    ori r3, r3, 34192
1112; P8LE-NEXT:    ori r4, r4, 28749
1113; P8LE-NEXT:    ori r5, r5, 15432
1114; P8LE-NEXT:    mfvsrd r8, v3
1115; P8LE-NEXT:    sldi r3, r3, 32
1116; P8LE-NEXT:    sldi r4, r4, 32
1117; P8LE-NEXT:    oris r3, r3, 45590
1118; P8LE-NEXT:    mffprd r7, f0
1119; P8LE-NEXT:    sldi r5, r5, 32
1120; P8LE-NEXT:    oris r4, r4, 52170
1121; P8LE-NEXT:    ori r3, r3, 17097
1122; P8LE-NEXT:    oris r5, r5, 1603
1123; P8LE-NEXT:    ori r4, r4, 12109
1124; P8LE-NEXT:    mulhdu r3, r7, r3
1125; P8LE-NEXT:    rldicl r9, r6, 63, 1
1126; P8LE-NEXT:    ori r5, r5, 21445
1127; P8LE-NEXT:    mulhdu r4, r8, r4
1128; P8LE-NEXT:    mulhdu r5, r9, r5
1129; P8LE-NEXT:    sub r9, r7, r3
1130; P8LE-NEXT:    rldicl r9, r9, 63, 1
1131; P8LE-NEXT:    rldicl r4, r4, 52, 12
1132; P8LE-NEXT:    add r3, r9, r3
1133; P8LE-NEXT:    rldicl r5, r5, 57, 7
1134; P8LE-NEXT:    mulli r4, r4, 5423
1135; P8LE-NEXT:    rldicl r3, r3, 60, 4
1136; P8LE-NEXT:    mulli r5, r5, 654
1137; P8LE-NEXT:    mulli r3, r3, 23
1138; P8LE-NEXT:    sub r4, r8, r4
1139; P8LE-NEXT:    sub r5, r6, r5
1140; P8LE-NEXT:    mtfprd f0, r4
1141; P8LE-NEXT:    sub r3, r7, r3
1142; P8LE-NEXT:    li r4, 0
1143; P8LE-NEXT:    mtfprd f1, r5
1144; P8LE-NEXT:    mtfprd f2, r3
1145; P8LE-NEXT:    mtfprd f3, r4
1146; P8LE-NEXT:    xxmrghd v3, vs0, vs2
1147; P8LE-NEXT:    xxmrghd v2, vs1, vs3
1148; P8LE-NEXT:    blr
1149;
1150; P8BE-LABEL: dont_fold_urem_i64:
1151; P8BE:       # %bb.0:
1152; P8BE-NEXT:    lis r3, 25644
1153; P8BE-NEXT:    lis r4, -16037
1154; P8BE-NEXT:    xxswapd vs0, v3
1155; P8BE-NEXT:    xxswapd vs1, v2
1156; P8BE-NEXT:    lis r5, 25653
1157; P8BE-NEXT:    ori r3, r3, 34192
1158; P8BE-NEXT:    ori r4, r4, 28749
1159; P8BE-NEXT:    mfvsrd r6, v3
1160; P8BE-NEXT:    ori r5, r5, 15432
1161; P8BE-NEXT:    sldi r3, r3, 32
1162; P8BE-NEXT:    sldi r4, r4, 32
1163; P8BE-NEXT:    oris r3, r3, 45590
1164; P8BE-NEXT:    sldi r5, r5, 32
1165; P8BE-NEXT:    mffprd r7, f0
1166; P8BE-NEXT:    oris r4, r4, 52170
1167; P8BE-NEXT:    ori r3, r3, 17097
1168; P8BE-NEXT:    mffprd r8, f1
1169; P8BE-NEXT:    oris r5, r5, 1603
1170; P8BE-NEXT:    ori r4, r4, 12109
1171; P8BE-NEXT:    mulhdu r3, r6, r3
1172; P8BE-NEXT:    ori r5, r5, 21445
1173; P8BE-NEXT:    mulhdu r4, r7, r4
1174; P8BE-NEXT:    rldicl r9, r8, 63, 1
1175; P8BE-NEXT:    mulhdu r5, r9, r5
1176; P8BE-NEXT:    sub r9, r6, r3
1177; P8BE-NEXT:    rldicl r9, r9, 63, 1
1178; P8BE-NEXT:    rldicl r4, r4, 52, 12
1179; P8BE-NEXT:    add r3, r9, r3
1180; P8BE-NEXT:    mulli r4, r4, 5423
1181; P8BE-NEXT:    rldicl r5, r5, 57, 7
1182; P8BE-NEXT:    rldicl r3, r3, 60, 4
1183; P8BE-NEXT:    mulli r5, r5, 654
1184; P8BE-NEXT:    mulli r3, r3, 23
1185; P8BE-NEXT:    sub r4, r7, r4
1186; P8BE-NEXT:    mtfprd f0, r4
1187; P8BE-NEXT:    sub r4, r8, r5
1188; P8BE-NEXT:    sub r3, r6, r3
1189; P8BE-NEXT:    mtfprd f1, r4
1190; P8BE-NEXT:    li r4, 0
1191; P8BE-NEXT:    mtfprd f2, r3
1192; P8BE-NEXT:    mtfprd f3, r4
1193; P8BE-NEXT:    xxmrghd v3, vs2, vs0
1194; P8BE-NEXT:    xxmrghd v2, vs3, vs1
1195; P8BE-NEXT:    blr
1196  %1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
1197  ret <4 x i64> %1
1198}
1199