1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3; RUN:		-mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
9; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
10
11define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
12; P9LE-LABEL: fold_srem_vec_1:
13; P9LE:       # %bb.0:
14; P9LE-NEXT:    li r3, 0
15; P9LE-NEXT:    lis r4, -21386
16; P9LE-NEXT:    vextuhrx r3, r3, v2
17; P9LE-NEXT:    ori r4, r4, 37253
18; P9LE-NEXT:    extsh r3, r3
19; P9LE-NEXT:    mulhw r4, r3, r4
20; P9LE-NEXT:    add r4, r4, r3
21; P9LE-NEXT:    srwi r5, r4, 31
22; P9LE-NEXT:    srawi r4, r4, 6
23; P9LE-NEXT:    add r4, r4, r5
24; P9LE-NEXT:    mulli r4, r4, 95
25; P9LE-NEXT:    sub r3, r3, r4
26; P9LE-NEXT:    lis r4, 31710
27; P9LE-NEXT:    mtvsrd v3, r3
28; P9LE-NEXT:    li r3, 2
29; P9LE-NEXT:    ori r4, r4, 63421
30; P9LE-NEXT:    vextuhrx r3, r3, v2
31; P9LE-NEXT:    extsh r3, r3
32; P9LE-NEXT:    mulhw r4, r3, r4
33; P9LE-NEXT:    sub r4, r4, r3
34; P9LE-NEXT:    srwi r5, r4, 31
35; P9LE-NEXT:    srawi r4, r4, 6
36; P9LE-NEXT:    add r4, r4, r5
37; P9LE-NEXT:    mulli r4, r4, -124
38; P9LE-NEXT:    sub r3, r3, r4
39; P9LE-NEXT:    lis r4, 21399
40; P9LE-NEXT:    mtvsrd v4, r3
41; P9LE-NEXT:    li r3, 4
42; P9LE-NEXT:    ori r4, r4, 33437
43; P9LE-NEXT:    vextuhrx r3, r3, v2
44; P9LE-NEXT:    vmrghh v3, v4, v3
45; P9LE-NEXT:    extsh r3, r3
46; P9LE-NEXT:    mulhw r4, r3, r4
47; P9LE-NEXT:    srwi r5, r4, 31
48; P9LE-NEXT:    srawi r4, r4, 5
49; P9LE-NEXT:    add r4, r4, r5
50; P9LE-NEXT:    mulli r4, r4, 98
51; P9LE-NEXT:    sub r3, r3, r4
52; P9LE-NEXT:    lis r4, -16728
53; P9LE-NEXT:    mtvsrd v4, r3
54; P9LE-NEXT:    li r3, 6
55; P9LE-NEXT:    ori r4, r4, 63249
56; P9LE-NEXT:    vextuhrx r3, r3, v2
57; P9LE-NEXT:    extsh r3, r3
58; P9LE-NEXT:    mulhw r4, r3, r4
59; P9LE-NEXT:    srwi r5, r4, 31
60; P9LE-NEXT:    srawi r4, r4, 8
61; P9LE-NEXT:    add r4, r4, r5
62; P9LE-NEXT:    mulli r4, r4, -1003
63; P9LE-NEXT:    sub r3, r3, r4
64; P9LE-NEXT:    mtvsrd v2, r3
65; P9LE-NEXT:    vmrghh v2, v2, v4
66; P9LE-NEXT:    vmrglw v2, v2, v3
67; P9LE-NEXT:    blr
68;
69; P9BE-LABEL: fold_srem_vec_1:
70; P9BE:       # %bb.0:
71; P9BE-NEXT:    li r3, 2
72; P9BE-NEXT:    lis r4, 31710
73; P9BE-NEXT:    vextuhlx r3, r3, v2
74; P9BE-NEXT:    ori r4, r4, 63421
75; P9BE-NEXT:    extsh r3, r3
76; P9BE-NEXT:    mulhw r4, r3, r4
77; P9BE-NEXT:    sub r4, r4, r3
78; P9BE-NEXT:    srwi r5, r4, 31
79; P9BE-NEXT:    srawi r4, r4, 6
80; P9BE-NEXT:    add r4, r4, r5
81; P9BE-NEXT:    mulli r4, r4, -124
82; P9BE-NEXT:    sub r3, r3, r4
83; P9BE-NEXT:    lis r4, -21386
84; P9BE-NEXT:    mtvsrwz v3, r3
85; P9BE-NEXT:    li r3, 0
86; P9BE-NEXT:    ori r4, r4, 37253
87; P9BE-NEXT:    vextuhlx r3, r3, v2
88; P9BE-NEXT:    extsh r3, r3
89; P9BE-NEXT:    mulhw r4, r3, r4
90; P9BE-NEXT:    add r4, r4, r3
91; P9BE-NEXT:    srwi r5, r4, 31
92; P9BE-NEXT:    srawi r4, r4, 6
93; P9BE-NEXT:    add r4, r4, r5
94; P9BE-NEXT:    mulli r4, r4, 95
95; P9BE-NEXT:    sub r3, r3, r4
96; P9BE-NEXT:    lis r4, -16728
97; P9BE-NEXT:    mtvsrwz v4, r3
98; P9BE-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
99; P9BE-NEXT:    ori r4, r4, 63249
100; P9BE-NEXT:    addi r3, r3, .LCPI0_0@toc@l
101; P9BE-NEXT:    lxv v5, 0(r3)
102; P9BE-NEXT:    li r3, 6
103; P9BE-NEXT:    vextuhlx r3, r3, v2
104; P9BE-NEXT:    extsh r3, r3
105; P9BE-NEXT:    vperm v3, v4, v3, v5
106; P9BE-NEXT:    mulhw r4, r3, r4
107; P9BE-NEXT:    srwi r5, r4, 31
108; P9BE-NEXT:    srawi r4, r4, 8
109; P9BE-NEXT:    add r4, r4, r5
110; P9BE-NEXT:    mulli r4, r4, -1003
111; P9BE-NEXT:    sub r3, r3, r4
112; P9BE-NEXT:    lis r4, 21399
113; P9BE-NEXT:    mtvsrwz v4, r3
114; P9BE-NEXT:    li r3, 4
115; P9BE-NEXT:    ori r4, r4, 33437
116; P9BE-NEXT:    vextuhlx r3, r3, v2
117; P9BE-NEXT:    extsh r3, r3
118; P9BE-NEXT:    mulhw r4, r3, r4
119; P9BE-NEXT:    srwi r5, r4, 31
120; P9BE-NEXT:    srawi r4, r4, 5
121; P9BE-NEXT:    add r4, r4, r5
122; P9BE-NEXT:    mulli r4, r4, 98
123; P9BE-NEXT:    sub r3, r3, r4
124; P9BE-NEXT:    mtvsrwz v2, r3
125; P9BE-NEXT:    vperm v2, v2, v4, v5
126; P9BE-NEXT:    vmrghw v2, v3, v2
127; P9BE-NEXT:    blr
128;
129; P8LE-LABEL: fold_srem_vec_1:
130; P8LE:       # %bb.0:
131; P8LE-NEXT:    xxswapd vs0, v2
132; P8LE-NEXT:    lis r3, 21399
133; P8LE-NEXT:    lis r8, -16728
134; P8LE-NEXT:    lis r9, -21386
135; P8LE-NEXT:    lis r10, 31710
136; P8LE-NEXT:    ori r3, r3, 33437
137; P8LE-NEXT:    ori r8, r8, 63249
138; P8LE-NEXT:    ori r9, r9, 37253
139; P8LE-NEXT:    ori r10, r10, 63421
140; P8LE-NEXT:    mffprd r4, f0
141; P8LE-NEXT:    rldicl r5, r4, 32, 48
142; P8LE-NEXT:    rldicl r6, r4, 16, 48
143; P8LE-NEXT:    clrldi r7, r4, 48
144; P8LE-NEXT:    extsh r5, r5
145; P8LE-NEXT:    extsh r6, r6
146; P8LE-NEXT:    rldicl r4, r4, 48, 48
147; P8LE-NEXT:    extsh r7, r7
148; P8LE-NEXT:    mulhw r3, r5, r3
149; P8LE-NEXT:    extsh r4, r4
150; P8LE-NEXT:    mulhw r8, r6, r8
151; P8LE-NEXT:    mulhw r9, r7, r9
152; P8LE-NEXT:    mulhw r10, r4, r10
153; P8LE-NEXT:    srwi r11, r3, 31
154; P8LE-NEXT:    srawi r3, r3, 5
155; P8LE-NEXT:    add r3, r3, r11
156; P8LE-NEXT:    srwi r11, r8, 31
157; P8LE-NEXT:    add r9, r9, r7
158; P8LE-NEXT:    srawi r8, r8, 8
159; P8LE-NEXT:    sub r10, r10, r4
160; P8LE-NEXT:    add r8, r8, r11
161; P8LE-NEXT:    srwi r11, r9, 31
162; P8LE-NEXT:    srawi r9, r9, 6
163; P8LE-NEXT:    mulli r3, r3, 98
164; P8LE-NEXT:    add r9, r9, r11
165; P8LE-NEXT:    srwi r11, r10, 31
166; P8LE-NEXT:    srawi r10, r10, 6
167; P8LE-NEXT:    mulli r8, r8, -1003
168; P8LE-NEXT:    add r10, r10, r11
169; P8LE-NEXT:    mulli r9, r9, 95
170; P8LE-NEXT:    mulli r10, r10, -124
171; P8LE-NEXT:    sub r3, r5, r3
172; P8LE-NEXT:    mtvsrd v2, r3
173; P8LE-NEXT:    sub r5, r6, r8
174; P8LE-NEXT:    sub r3, r7, r9
175; P8LE-NEXT:    mtvsrd v3, r5
176; P8LE-NEXT:    sub r4, r4, r10
177; P8LE-NEXT:    mtvsrd v4, r3
178; P8LE-NEXT:    mtvsrd v5, r4
179; P8LE-NEXT:    vmrghh v2, v3, v2
180; P8LE-NEXT:    vmrghh v3, v5, v4
181; P8LE-NEXT:    vmrglw v2, v2, v3
182; P8LE-NEXT:    blr
183;
184; P8BE-LABEL: fold_srem_vec_1:
185; P8BE:       # %bb.0:
186; P8BE-NEXT:    mfvsrd r4, v2
187; P8BE-NEXT:    lis r3, -16728
188; P8BE-NEXT:    lis r8, 21399
189; P8BE-NEXT:    lis r9, 31710
190; P8BE-NEXT:    lis r10, -21386
191; P8BE-NEXT:    ori r3, r3, 63249
192; P8BE-NEXT:    ori r8, r8, 33437
193; P8BE-NEXT:    ori r9, r9, 63421
194; P8BE-NEXT:    ori r10, r10, 37253
195; P8BE-NEXT:    clrldi r5, r4, 48
196; P8BE-NEXT:    rldicl r6, r4, 48, 48
197; P8BE-NEXT:    rldicl r7, r4, 32, 48
198; P8BE-NEXT:    extsh r5, r5
199; P8BE-NEXT:    extsh r6, r6
200; P8BE-NEXT:    rldicl r4, r4, 16, 48
201; P8BE-NEXT:    extsh r7, r7
202; P8BE-NEXT:    mulhw r3, r5, r3
203; P8BE-NEXT:    extsh r4, r4
204; P8BE-NEXT:    mulhw r8, r6, r8
205; P8BE-NEXT:    mulhw r9, r7, r9
206; P8BE-NEXT:    mulhw r10, r4, r10
207; P8BE-NEXT:    srwi r11, r3, 31
208; P8BE-NEXT:    srawi r3, r3, 8
209; P8BE-NEXT:    add r3, r3, r11
210; P8BE-NEXT:    srwi r11, r8, 31
211; P8BE-NEXT:    sub r9, r9, r7
212; P8BE-NEXT:    srawi r8, r8, 5
213; P8BE-NEXT:    add r10, r10, r4
214; P8BE-NEXT:    add r8, r8, r11
215; P8BE-NEXT:    srwi r11, r9, 31
216; P8BE-NEXT:    srawi r9, r9, 6
217; P8BE-NEXT:    mulli r3, r3, -1003
218; P8BE-NEXT:    add r9, r9, r11
219; P8BE-NEXT:    srwi r11, r10, 31
220; P8BE-NEXT:    srawi r10, r10, 6
221; P8BE-NEXT:    mulli r8, r8, 98
222; P8BE-NEXT:    add r10, r10, r11
223; P8BE-NEXT:    mulli r9, r9, -124
224; P8BE-NEXT:    mulli r10, r10, 95
225; P8BE-NEXT:    sub r3, r5, r3
226; P8BE-NEXT:    addis r5, r2, .LCPI0_0@toc@ha
227; P8BE-NEXT:    mtvsrwz v2, r3
228; P8BE-NEXT:    addi r3, r5, .LCPI0_0@toc@l
229; P8BE-NEXT:    sub r6, r6, r8
230; P8BE-NEXT:    lxvw4x v3, 0, r3
231; P8BE-NEXT:    sub r3, r7, r9
232; P8BE-NEXT:    mtvsrwz v4, r6
233; P8BE-NEXT:    sub r4, r4, r10
234; P8BE-NEXT:    mtvsrwz v5, r3
235; P8BE-NEXT:    mtvsrwz v0, r4
236; P8BE-NEXT:    vperm v2, v4, v2, v3
237; P8BE-NEXT:    vperm v3, v0, v5, v3
238; P8BE-NEXT:    vmrghw v2, v3, v2
239; P8BE-NEXT:    blr
240  %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
241  ret <4 x i16> %1
242}
243
244define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
245; P9LE-LABEL: fold_srem_vec_2:
246; P9LE:       # %bb.0:
247; P9LE-NEXT:    li r3, 0
248; P9LE-NEXT:    lis r4, -21386
249; P9LE-NEXT:    vextuhrx r3, r3, v2
250; P9LE-NEXT:    ori r4, r4, 37253
251; P9LE-NEXT:    extsh r3, r3
252; P9LE-NEXT:    mulhw r5, r3, r4
253; P9LE-NEXT:    add r5, r5, r3
254; P9LE-NEXT:    srwi r6, r5, 31
255; P9LE-NEXT:    srawi r5, r5, 6
256; P9LE-NEXT:    add r5, r5, r6
257; P9LE-NEXT:    mulli r5, r5, 95
258; P9LE-NEXT:    sub r3, r3, r5
259; P9LE-NEXT:    mtvsrd v3, r3
260; P9LE-NEXT:    li r3, 2
261; P9LE-NEXT:    vextuhrx r3, r3, v2
262; P9LE-NEXT:    extsh r3, r3
263; P9LE-NEXT:    mulhw r5, r3, r4
264; P9LE-NEXT:    add r5, r5, r3
265; P9LE-NEXT:    srwi r6, r5, 31
266; P9LE-NEXT:    srawi r5, r5, 6
267; P9LE-NEXT:    add r5, r5, r6
268; P9LE-NEXT:    mulli r5, r5, 95
269; P9LE-NEXT:    sub r3, r3, r5
270; P9LE-NEXT:    mtvsrd v4, r3
271; P9LE-NEXT:    li r3, 4
272; P9LE-NEXT:    vextuhrx r3, r3, v2
273; P9LE-NEXT:    vmrghh v3, v4, v3
274; P9LE-NEXT:    extsh r3, r3
275; P9LE-NEXT:    mulhw r5, r3, r4
276; P9LE-NEXT:    add r5, r5, r3
277; P9LE-NEXT:    srwi r6, r5, 31
278; P9LE-NEXT:    srawi r5, r5, 6
279; P9LE-NEXT:    add r5, r5, r6
280; P9LE-NEXT:    mulli r5, r5, 95
281; P9LE-NEXT:    sub r3, r3, r5
282; P9LE-NEXT:    mtvsrd v4, r3
283; P9LE-NEXT:    li r3, 6
284; P9LE-NEXT:    vextuhrx r3, r3, v2
285; P9LE-NEXT:    extsh r3, r3
286; P9LE-NEXT:    mulhw r4, r3, r4
287; P9LE-NEXT:    add r4, r4, r3
288; P9LE-NEXT:    srwi r5, r4, 31
289; P9LE-NEXT:    srawi r4, r4, 6
290; P9LE-NEXT:    add r4, r4, r5
291; P9LE-NEXT:    mulli r4, r4, 95
292; P9LE-NEXT:    sub r3, r3, r4
293; P9LE-NEXT:    mtvsrd v2, r3
294; P9LE-NEXT:    vmrghh v2, v2, v4
295; P9LE-NEXT:    vmrglw v2, v2, v3
296; P9LE-NEXT:    blr
297;
298; P9BE-LABEL: fold_srem_vec_2:
299; P9BE:       # %bb.0:
300; P9BE-NEXT:    li r3, 6
301; P9BE-NEXT:    lis r4, -21386
302; P9BE-NEXT:    vextuhlx r3, r3, v2
303; P9BE-NEXT:    ori r4, r4, 37253
304; P9BE-NEXT:    extsh r3, r3
305; P9BE-NEXT:    mulhw r5, r3, r4
306; P9BE-NEXT:    add r5, r5, r3
307; P9BE-NEXT:    srwi r6, r5, 31
308; P9BE-NEXT:    srawi r5, r5, 6
309; P9BE-NEXT:    add r5, r5, r6
310; P9BE-NEXT:    mulli r5, r5, 95
311; P9BE-NEXT:    sub r3, r3, r5
312; P9BE-NEXT:    mtvsrwz v3, r3
313; P9BE-NEXT:    li r3, 4
314; P9BE-NEXT:    vextuhlx r3, r3, v2
315; P9BE-NEXT:    extsh r3, r3
316; P9BE-NEXT:    mulhw r5, r3, r4
317; P9BE-NEXT:    add r5, r5, r3
318; P9BE-NEXT:    srwi r6, r5, 31
319; P9BE-NEXT:    srawi r5, r5, 6
320; P9BE-NEXT:    add r5, r5, r6
321; P9BE-NEXT:    mulli r5, r5, 95
322; P9BE-NEXT:    sub r3, r3, r5
323; P9BE-NEXT:    mtvsrwz v4, r3
324; P9BE-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
325; P9BE-NEXT:    addi r3, r3, .LCPI1_0@toc@l
326; P9BE-NEXT:    lxv v5, 0(r3)
327; P9BE-NEXT:    li r3, 2
328; P9BE-NEXT:    vextuhlx r3, r3, v2
329; P9BE-NEXT:    extsh r3, r3
330; P9BE-NEXT:    vperm v3, v4, v3, v5
331; P9BE-NEXT:    mulhw r5, r3, r4
332; P9BE-NEXT:    add r5, r5, r3
333; P9BE-NEXT:    srwi r6, r5, 31
334; P9BE-NEXT:    srawi r5, r5, 6
335; P9BE-NEXT:    add r5, r5, r6
336; P9BE-NEXT:    mulli r5, r5, 95
337; P9BE-NEXT:    sub r3, r3, r5
338; P9BE-NEXT:    mtvsrwz v4, r3
339; P9BE-NEXT:    li r3, 0
340; P9BE-NEXT:    vextuhlx r3, r3, v2
341; P9BE-NEXT:    extsh r3, r3
342; P9BE-NEXT:    mulhw r4, r3, r4
343; P9BE-NEXT:    add r4, r4, r3
344; P9BE-NEXT:    srwi r5, r4, 31
345; P9BE-NEXT:    srawi r4, r4, 6
346; P9BE-NEXT:    add r4, r4, r5
347; P9BE-NEXT:    mulli r4, r4, 95
348; P9BE-NEXT:    sub r3, r3, r4
349; P9BE-NEXT:    mtvsrwz v2, r3
350; P9BE-NEXT:    vperm v2, v2, v4, v5
351; P9BE-NEXT:    vmrghw v2, v2, v3
352; P9BE-NEXT:    blr
353;
354; P8LE-LABEL: fold_srem_vec_2:
355; P8LE:       # %bb.0:
356; P8LE-NEXT:    xxswapd vs0, v2
357; P8LE-NEXT:    lis r3, -21386
358; P8LE-NEXT:    ori r3, r3, 37253
359; P8LE-NEXT:    mffprd r4, f0
360; P8LE-NEXT:    clrldi r5, r4, 48
361; P8LE-NEXT:    rldicl r6, r4, 48, 48
362; P8LE-NEXT:    extsh r5, r5
363; P8LE-NEXT:    rldicl r7, r4, 32, 48
364; P8LE-NEXT:    extsh r6, r6
365; P8LE-NEXT:    mulhw r8, r5, r3
366; P8LE-NEXT:    rldicl r4, r4, 16, 48
367; P8LE-NEXT:    extsh r7, r7
368; P8LE-NEXT:    mulhw r9, r6, r3
369; P8LE-NEXT:    extsh r4, r4
370; P8LE-NEXT:    mulhw r10, r7, r3
371; P8LE-NEXT:    mulhw r3, r4, r3
372; P8LE-NEXT:    add r8, r8, r5
373; P8LE-NEXT:    add r9, r9, r6
374; P8LE-NEXT:    srwi r11, r8, 31
375; P8LE-NEXT:    srawi r8, r8, 6
376; P8LE-NEXT:    add r10, r10, r7
377; P8LE-NEXT:    add r3, r3, r4
378; P8LE-NEXT:    add r8, r8, r11
379; P8LE-NEXT:    srwi r11, r9, 31
380; P8LE-NEXT:    srawi r9, r9, 6
381; P8LE-NEXT:    mulli r8, r8, 95
382; P8LE-NEXT:    add r9, r9, r11
383; P8LE-NEXT:    srwi r11, r10, 31
384; P8LE-NEXT:    srawi r10, r10, 6
385; P8LE-NEXT:    mulli r9, r9, 95
386; P8LE-NEXT:    add r10, r10, r11
387; P8LE-NEXT:    srwi r11, r3, 31
388; P8LE-NEXT:    srawi r3, r3, 6
389; P8LE-NEXT:    mulli r10, r10, 95
390; P8LE-NEXT:    sub r5, r5, r8
391; P8LE-NEXT:    add r3, r3, r11
392; P8LE-NEXT:    mtvsrd v2, r5
393; P8LE-NEXT:    mulli r3, r3, 95
394; P8LE-NEXT:    sub r6, r6, r9
395; P8LE-NEXT:    mtvsrd v3, r6
396; P8LE-NEXT:    sub r5, r7, r10
397; P8LE-NEXT:    mtvsrd v4, r5
398; P8LE-NEXT:    sub r3, r4, r3
399; P8LE-NEXT:    vmrghh v2, v3, v2
400; P8LE-NEXT:    mtvsrd v5, r3
401; P8LE-NEXT:    vmrghh v3, v5, v4
402; P8LE-NEXT:    vmrglw v2, v3, v2
403; P8LE-NEXT:    blr
404;
405; P8BE-LABEL: fold_srem_vec_2:
406; P8BE:       # %bb.0:
407; P8BE-NEXT:    mfvsrd r4, v2
408; P8BE-NEXT:    lis r3, -21386
409; P8BE-NEXT:    ori r3, r3, 37253
410; P8BE-NEXT:    clrldi r5, r4, 48
411; P8BE-NEXT:    rldicl r6, r4, 48, 48
412; P8BE-NEXT:    extsh r5, r5
413; P8BE-NEXT:    rldicl r7, r4, 32, 48
414; P8BE-NEXT:    extsh r6, r6
415; P8BE-NEXT:    mulhw r8, r5, r3
416; P8BE-NEXT:    rldicl r4, r4, 16, 48
417; P8BE-NEXT:    extsh r7, r7
418; P8BE-NEXT:    mulhw r9, r6, r3
419; P8BE-NEXT:    extsh r4, r4
420; P8BE-NEXT:    mulhw r10, r7, r3
421; P8BE-NEXT:    mulhw r3, r4, r3
422; P8BE-NEXT:    add r8, r8, r5
423; P8BE-NEXT:    add r9, r9, r6
424; P8BE-NEXT:    srwi r11, r8, 31
425; P8BE-NEXT:    srawi r8, r8, 6
426; P8BE-NEXT:    add r10, r10, r7
427; P8BE-NEXT:    add r3, r3, r4
428; P8BE-NEXT:    add r8, r8, r11
429; P8BE-NEXT:    srwi r11, r9, 31
430; P8BE-NEXT:    srawi r9, r9, 6
431; P8BE-NEXT:    mulli r8, r8, 95
432; P8BE-NEXT:    add r9, r9, r11
433; P8BE-NEXT:    srwi r11, r10, 31
434; P8BE-NEXT:    srawi r10, r10, 6
435; P8BE-NEXT:    mulli r9, r9, 95
436; P8BE-NEXT:    add r10, r10, r11
437; P8BE-NEXT:    srwi r11, r3, 31
438; P8BE-NEXT:    srawi r3, r3, 6
439; P8BE-NEXT:    mulli r10, r10, 95
440; P8BE-NEXT:    sub r5, r5, r8
441; P8BE-NEXT:    addis r8, r2, .LCPI1_0@toc@ha
442; P8BE-NEXT:    add r3, r3, r11
443; P8BE-NEXT:    mtvsrwz v2, r5
444; P8BE-NEXT:    addi r5, r8, .LCPI1_0@toc@l
445; P8BE-NEXT:    mulli r3, r3, 95
446; P8BE-NEXT:    sub r6, r6, r9
447; P8BE-NEXT:    lxvw4x v3, 0, r5
448; P8BE-NEXT:    mtvsrwz v4, r6
449; P8BE-NEXT:    sub r5, r7, r10
450; P8BE-NEXT:    mtvsrwz v5, r5
451; P8BE-NEXT:    sub r3, r4, r3
452; P8BE-NEXT:    vperm v2, v4, v2, v3
453; P8BE-NEXT:    mtvsrwz v0, r3
454; P8BE-NEXT:    vperm v3, v0, v5, v3
455; P8BE-NEXT:    vmrghw v2, v3, v2
456; P8BE-NEXT:    blr
457  %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
458  ret <4 x i16> %1
459}
460
461
462; Don't fold if we can combine srem with sdiv.
463define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
464; P9LE-LABEL: combine_srem_sdiv:
465; P9LE:       # %bb.0:
466; P9LE-NEXT:    li r3, 0
467; P9LE-NEXT:    lis r4, -21386
468; P9LE-NEXT:    vextuhrx r3, r3, v2
469; P9LE-NEXT:    ori r4, r4, 37253
470; P9LE-NEXT:    extsh r3, r3
471; P9LE-NEXT:    mulhw r5, r3, r4
472; P9LE-NEXT:    add r5, r5, r3
473; P9LE-NEXT:    srwi r6, r5, 31
474; P9LE-NEXT:    srawi r5, r5, 6
475; P9LE-NEXT:    add r5, r5, r6
476; P9LE-NEXT:    mulli r6, r5, 95
477; P9LE-NEXT:    sub r3, r3, r6
478; P9LE-NEXT:    mtvsrd v3, r3
479; P9LE-NEXT:    li r3, 2
480; P9LE-NEXT:    vextuhrx r3, r3, v2
481; P9LE-NEXT:    extsh r6, r3
482; P9LE-NEXT:    mulhw r7, r6, r4
483; P9LE-NEXT:    add r6, r7, r6
484; P9LE-NEXT:    srwi r7, r6, 31
485; P9LE-NEXT:    srawi r6, r6, 6
486; P9LE-NEXT:    add r6, r6, r7
487; P9LE-NEXT:    mulli r7, r6, 95
488; P9LE-NEXT:    sub r3, r3, r7
489; P9LE-NEXT:    mtvsrd v4, r3
490; P9LE-NEXT:    li r3, 4
491; P9LE-NEXT:    vextuhrx r3, r3, v2
492; P9LE-NEXT:    vmrghh v3, v4, v3
493; P9LE-NEXT:    extsh r7, r3
494; P9LE-NEXT:    mulhw r8, r7, r4
495; P9LE-NEXT:    add r7, r8, r7
496; P9LE-NEXT:    srwi r8, r7, 31
497; P9LE-NEXT:    srawi r7, r7, 6
498; P9LE-NEXT:    add r7, r7, r8
499; P9LE-NEXT:    mulli r8, r7, 95
500; P9LE-NEXT:    sub r3, r3, r8
501; P9LE-NEXT:    mtvsrd v4, r3
502; P9LE-NEXT:    li r3, 6
503; P9LE-NEXT:    vextuhrx r3, r3, v2
504; P9LE-NEXT:    extsh r8, r3
505; P9LE-NEXT:    mulhw r4, r8, r4
506; P9LE-NEXT:    add r4, r4, r8
507; P9LE-NEXT:    srwi r8, r4, 31
508; P9LE-NEXT:    srawi r4, r4, 6
509; P9LE-NEXT:    add r4, r4, r8
510; P9LE-NEXT:    mulli r8, r4, 95
511; P9LE-NEXT:    mtvsrd v5, r4
512; P9LE-NEXT:    sub r3, r3, r8
513; P9LE-NEXT:    mtvsrd v2, r3
514; P9LE-NEXT:    vmrghh v2, v2, v4
515; P9LE-NEXT:    mtvsrd v4, r6
516; P9LE-NEXT:    vmrglw v2, v2, v3
517; P9LE-NEXT:    mtvsrd v3, r5
518; P9LE-NEXT:    vmrghh v3, v4, v3
519; P9LE-NEXT:    mtvsrd v4, r7
520; P9LE-NEXT:    vmrghh v4, v5, v4
521; P9LE-NEXT:    vmrglw v3, v4, v3
522; P9LE-NEXT:    vadduhm v2, v2, v3
523; P9LE-NEXT:    blr
524;
525; P9BE-LABEL: combine_srem_sdiv:
526; P9BE:       # %bb.0:
527; P9BE-NEXT:    li r3, 6
528; P9BE-NEXT:    lis r5, -21386
529; P9BE-NEXT:    vextuhlx r3, r3, v2
530; P9BE-NEXT:    ori r5, r5, 37253
531; P9BE-NEXT:    extsh r4, r3
532; P9BE-NEXT:    mulhw r6, r4, r5
533; P9BE-NEXT:    add r4, r6, r4
534; P9BE-NEXT:    srwi r6, r4, 31
535; P9BE-NEXT:    srawi r4, r4, 6
536; P9BE-NEXT:    add r4, r4, r6
537; P9BE-NEXT:    mulli r6, r4, 95
538; P9BE-NEXT:    sub r3, r3, r6
539; P9BE-NEXT:    mtvsrwz v3, r3
540; P9BE-NEXT:    li r3, 4
541; P9BE-NEXT:    vextuhlx r3, r3, v2
542; P9BE-NEXT:    extsh r6, r3
543; P9BE-NEXT:    mulhw r7, r6, r5
544; P9BE-NEXT:    add r6, r7, r6
545; P9BE-NEXT:    srwi r7, r6, 31
546; P9BE-NEXT:    srawi r6, r6, 6
547; P9BE-NEXT:    add r6, r6, r7
548; P9BE-NEXT:    mulli r7, r6, 95
549; P9BE-NEXT:    sub r3, r3, r7
550; P9BE-NEXT:    mtvsrwz v4, r3
551; P9BE-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
552; P9BE-NEXT:    addi r3, r3, .LCPI2_0@toc@l
553; P9BE-NEXT:    lxv v5, 0(r3)
554; P9BE-NEXT:    li r3, 2
555; P9BE-NEXT:    vextuhlx r3, r3, v2
556; P9BE-NEXT:    extsh r7, r3
557; P9BE-NEXT:    vperm v3, v4, v3, v5
558; P9BE-NEXT:    mulhw r8, r7, r5
559; P9BE-NEXT:    add r7, r8, r7
560; P9BE-NEXT:    srwi r8, r7, 31
561; P9BE-NEXT:    srawi r7, r7, 6
562; P9BE-NEXT:    add r7, r7, r8
563; P9BE-NEXT:    mulli r8, r7, 95
564; P9BE-NEXT:    sub r3, r3, r8
565; P9BE-NEXT:    mtvsrwz v4, r3
566; P9BE-NEXT:    li r3, 0
567; P9BE-NEXT:    vextuhlx r3, r3, v2
568; P9BE-NEXT:    extsh r3, r3
569; P9BE-NEXT:    mulhw r5, r3, r5
570; P9BE-NEXT:    add r5, r5, r3
571; P9BE-NEXT:    srwi r8, r5, 31
572; P9BE-NEXT:    srawi r5, r5, 6
573; P9BE-NEXT:    add r5, r5, r8
574; P9BE-NEXT:    mulli r8, r5, 95
575; P9BE-NEXT:    mtvsrwz v0, r5
576; P9BE-NEXT:    sub r3, r3, r8
577; P9BE-NEXT:    mtvsrwz v2, r3
578; P9BE-NEXT:    vperm v2, v2, v4, v5
579; P9BE-NEXT:    mtvsrwz v4, r6
580; P9BE-NEXT:    vmrghw v2, v2, v3
581; P9BE-NEXT:    mtvsrwz v3, r4
582; P9BE-NEXT:    vperm v3, v4, v3, v5
583; P9BE-NEXT:    mtvsrwz v4, r7
584; P9BE-NEXT:    vperm v4, v0, v4, v5
585; P9BE-NEXT:    vmrghw v3, v4, v3
586; P9BE-NEXT:    vadduhm v2, v2, v3
587; P9BE-NEXT:    blr
588;
589; P8LE-LABEL: combine_srem_sdiv:
590; P8LE:       # %bb.0:
591; P8LE-NEXT:    xxswapd vs0, v2
592; P8LE-NEXT:    lis r3, -21386
593; P8LE-NEXT:    ori r3, r3, 37253
594; P8LE-NEXT:    mffprd r4, f0
595; P8LE-NEXT:    clrldi r5, r4, 48
596; P8LE-NEXT:    rldicl r6, r4, 48, 48
597; P8LE-NEXT:    rldicl r7, r4, 32, 48
598; P8LE-NEXT:    extsh r5, r5
599; P8LE-NEXT:    extsh r8, r6
600; P8LE-NEXT:    extsh r9, r7
601; P8LE-NEXT:    mulhw r10, r5, r3
602; P8LE-NEXT:    mulhw r11, r8, r3
603; P8LE-NEXT:    rldicl r4, r4, 16, 48
604; P8LE-NEXT:    mulhw r12, r9, r3
605; P8LE-NEXT:    extsh r0, r4
606; P8LE-NEXT:    mulhw r3, r0, r3
607; P8LE-NEXT:    add r10, r10, r5
608; P8LE-NEXT:    add r8, r11, r8
609; P8LE-NEXT:    srwi r11, r10, 31
610; P8LE-NEXT:    add r9, r12, r9
611; P8LE-NEXT:    srawi r10, r10, 6
612; P8LE-NEXT:    srawi r12, r8, 6
613; P8LE-NEXT:    srwi r8, r8, 31
614; P8LE-NEXT:    add r10, r10, r11
615; P8LE-NEXT:    add r3, r3, r0
616; P8LE-NEXT:    srawi r11, r9, 6
617; P8LE-NEXT:    srwi r9, r9, 31
618; P8LE-NEXT:    add r8, r12, r8
619; P8LE-NEXT:    mtvsrd v2, r10
620; P8LE-NEXT:    mulli r12, r10, 95
621; P8LE-NEXT:    add r9, r11, r9
622; P8LE-NEXT:    srwi r11, r3, 31
623; P8LE-NEXT:    mtvsrd v3, r8
624; P8LE-NEXT:    srawi r3, r3, 6
625; P8LE-NEXT:    mulli r10, r8, 95
626; P8LE-NEXT:    mtvsrd v4, r9
627; P8LE-NEXT:    add r3, r3, r11
628; P8LE-NEXT:    mulli r8, r9, 95
629; P8LE-NEXT:    vmrghh v2, v3, v2
630; P8LE-NEXT:    mulli r9, r3, 95
631; P8LE-NEXT:    sub r5, r5, r12
632; P8LE-NEXT:    sub r6, r6, r10
633; P8LE-NEXT:    mtvsrd v3, r5
634; P8LE-NEXT:    mtvsrd v5, r6
635; P8LE-NEXT:    sub r5, r7, r8
636; P8LE-NEXT:    sub r4, r4, r9
637; P8LE-NEXT:    mtvsrd v0, r5
638; P8LE-NEXT:    mtvsrd v1, r4
639; P8LE-NEXT:    vmrghh v3, v5, v3
640; P8LE-NEXT:    mtvsrd v5, r3
641; P8LE-NEXT:    vmrghh v0, v1, v0
642; P8LE-NEXT:    vmrghh v4, v5, v4
643; P8LE-NEXT:    vmrglw v3, v0, v3
644; P8LE-NEXT:    vmrglw v2, v4, v2
645; P8LE-NEXT:    vadduhm v2, v3, v2
646; P8LE-NEXT:    blr
647;
648; P8BE-LABEL: combine_srem_sdiv:
649; P8BE:       # %bb.0:
650; P8BE-NEXT:    mfvsrd r4, v2
651; P8BE-NEXT:    lis r3, -21386
652; P8BE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
653; P8BE-NEXT:    addis r30, r2, .LCPI2_0@toc@ha
654; P8BE-NEXT:    ori r3, r3, 37253
655; P8BE-NEXT:    clrldi r5, r4, 48
656; P8BE-NEXT:    rldicl r6, r4, 48, 48
657; P8BE-NEXT:    rldicl r7, r4, 32, 48
658; P8BE-NEXT:    extsh r8, r5
659; P8BE-NEXT:    extsh r9, r6
660; P8BE-NEXT:    extsh r10, r7
661; P8BE-NEXT:    mulhw r11, r8, r3
662; P8BE-NEXT:    mulhw r12, r9, r3
663; P8BE-NEXT:    rldicl r4, r4, 16, 48
664; P8BE-NEXT:    mulhw r0, r10, r3
665; P8BE-NEXT:    extsh r4, r4
666; P8BE-NEXT:    mulhw r3, r4, r3
667; P8BE-NEXT:    add r8, r11, r8
668; P8BE-NEXT:    add r9, r12, r9
669; P8BE-NEXT:    srwi r11, r8, 31
670; P8BE-NEXT:    add r10, r0, r10
671; P8BE-NEXT:    srawi r8, r8, 6
672; P8BE-NEXT:    addi r0, r30, .LCPI2_0@toc@l
673; P8BE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
674; P8BE-NEXT:    srawi r12, r9, 6
675; P8BE-NEXT:    srwi r9, r9, 31
676; P8BE-NEXT:    add r8, r8, r11
677; P8BE-NEXT:    add r3, r3, r4
678; P8BE-NEXT:    lxvw4x v2, 0, r0
679; P8BE-NEXT:    srawi r11, r10, 6
680; P8BE-NEXT:    srwi r10, r10, 31
681; P8BE-NEXT:    add r9, r12, r9
682; P8BE-NEXT:    mtvsrwz v3, r8
683; P8BE-NEXT:    mulli r12, r8, 95
684; P8BE-NEXT:    add r10, r11, r10
685; P8BE-NEXT:    srwi r11, r3, 31
686; P8BE-NEXT:    mtvsrwz v4, r9
687; P8BE-NEXT:    srawi r3, r3, 6
688; P8BE-NEXT:    mulli r8, r9, 95
689; P8BE-NEXT:    mtvsrwz v5, r10
690; P8BE-NEXT:    add r3, r3, r11
691; P8BE-NEXT:    mulli r9, r10, 95
692; P8BE-NEXT:    vperm v3, v4, v3, v2
693; P8BE-NEXT:    mulli r10, r3, 95
694; P8BE-NEXT:    sub r5, r5, r12
695; P8BE-NEXT:    sub r6, r6, r8
696; P8BE-NEXT:    mtvsrwz v4, r5
697; P8BE-NEXT:    mtvsrwz v0, r6
698; P8BE-NEXT:    sub r5, r7, r9
699; P8BE-NEXT:    sub r4, r4, r10
700; P8BE-NEXT:    mtvsrwz v1, r5
701; P8BE-NEXT:    mtvsrwz v6, r4
702; P8BE-NEXT:    vperm v4, v0, v4, v2
703; P8BE-NEXT:    mtvsrwz v0, r3
704; P8BE-NEXT:    vperm v1, v6, v1, v2
705; P8BE-NEXT:    vperm v2, v0, v5, v2
706; P8BE-NEXT:    vmrghw v4, v1, v4
707; P8BE-NEXT:    vmrghw v2, v2, v3
708; P8BE-NEXT:    vadduhm v2, v4, v2
709; P8BE-NEXT:    blr
710  %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
711  %2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
712  %3 = add <4 x i16> %1, %2
713  ret <4 x i16> %3
714}
715
716; Don't fold for divisors that are a power of two.
717define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
718; P9LE-LABEL: dont_fold_srem_power_of_two:
719; P9LE:       # %bb.0:
720; P9LE-NEXT:    li r3, 0
721; P9LE-NEXT:    vextuhrx r3, r3, v2
722; P9LE-NEXT:    extsh r3, r3
723; P9LE-NEXT:    srawi r4, r3, 6
724; P9LE-NEXT:    addze r4, r4
725; P9LE-NEXT:    slwi r4, r4, 6
726; P9LE-NEXT:    sub r3, r3, r4
727; P9LE-NEXT:    mtvsrd v3, r3
728; P9LE-NEXT:    li r3, 2
729; P9LE-NEXT:    vextuhrx r3, r3, v2
730; P9LE-NEXT:    extsh r3, r3
731; P9LE-NEXT:    srawi r4, r3, 5
732; P9LE-NEXT:    addze r4, r4
733; P9LE-NEXT:    slwi r4, r4, 5
734; P9LE-NEXT:    sub r3, r3, r4
735; P9LE-NEXT:    lis r4, -21386
736; P9LE-NEXT:    mtvsrd v4, r3
737; P9LE-NEXT:    li r3, 6
738; P9LE-NEXT:    ori r4, r4, 37253
739; P9LE-NEXT:    vextuhrx r3, r3, v2
740; P9LE-NEXT:    vmrghh v3, v4, v3
741; P9LE-NEXT:    extsh r3, r3
742; P9LE-NEXT:    mulhw r4, r3, r4
743; P9LE-NEXT:    add r4, r4, r3
744; P9LE-NEXT:    srwi r5, r4, 31
745; P9LE-NEXT:    srawi r4, r4, 6
746; P9LE-NEXT:    add r4, r4, r5
747; P9LE-NEXT:    mulli r4, r4, 95
748; P9LE-NEXT:    sub r3, r3, r4
749; P9LE-NEXT:    mtvsrd v4, r3
750; P9LE-NEXT:    li r3, 4
751; P9LE-NEXT:    vextuhrx r3, r3, v2
752; P9LE-NEXT:    extsh r3, r3
753; P9LE-NEXT:    srawi r4, r3, 3
754; P9LE-NEXT:    addze r4, r4
755; P9LE-NEXT:    slwi r4, r4, 3
756; P9LE-NEXT:    sub r3, r3, r4
757; P9LE-NEXT:    mtvsrd v2, r3
758; P9LE-NEXT:    vmrghh v2, v4, v2
759; P9LE-NEXT:    vmrglw v2, v2, v3
760; P9LE-NEXT:    blr
761;
762; P9BE-LABEL: dont_fold_srem_power_of_two:
763; P9BE:       # %bb.0:
764; P9BE-NEXT:    li r3, 2
765; P9BE-NEXT:    vextuhlx r3, r3, v2
766; P9BE-NEXT:    extsh r3, r3
767; P9BE-NEXT:    srawi r4, r3, 5
768; P9BE-NEXT:    addze r4, r4
769; P9BE-NEXT:    slwi r4, r4, 5
770; P9BE-NEXT:    sub r3, r3, r4
771; P9BE-NEXT:    mtvsrwz v3, r3
772; P9BE-NEXT:    li r3, 0
773; P9BE-NEXT:    vextuhlx r3, r3, v2
774; P9BE-NEXT:    extsh r3, r3
775; P9BE-NEXT:    srawi r4, r3, 6
776; P9BE-NEXT:    addze r4, r4
777; P9BE-NEXT:    slwi r4, r4, 6
778; P9BE-NEXT:    sub r3, r3, r4
779; P9BE-NEXT:    lis r4, -21386
780; P9BE-NEXT:    mtvsrwz v4, r3
781; P9BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
782; P9BE-NEXT:    ori r4, r4, 37253
783; P9BE-NEXT:    addi r3, r3, .LCPI3_0@toc@l
784; P9BE-NEXT:    lxv v5, 0(r3)
785; P9BE-NEXT:    li r3, 6
786; P9BE-NEXT:    vextuhlx r3, r3, v2
787; P9BE-NEXT:    extsh r3, r3
788; P9BE-NEXT:    vperm v3, v4, v3, v5
789; P9BE-NEXT:    mulhw r4, r3, r4
790; P9BE-NEXT:    add r4, r4, r3
791; P9BE-NEXT:    srwi r5, r4, 31
792; P9BE-NEXT:    srawi r4, r4, 6
793; P9BE-NEXT:    add r4, r4, r5
794; P9BE-NEXT:    mulli r4, r4, 95
795; P9BE-NEXT:    sub r3, r3, r4
796; P9BE-NEXT:    mtvsrwz v4, r3
797; P9BE-NEXT:    li r3, 4
798; P9BE-NEXT:    vextuhlx r3, r3, v2
799; P9BE-NEXT:    extsh r3, r3
800; P9BE-NEXT:    srawi r4, r3, 3
801; P9BE-NEXT:    addze r4, r4
802; P9BE-NEXT:    slwi r4, r4, 3
803; P9BE-NEXT:    sub r3, r3, r4
804; P9BE-NEXT:    mtvsrwz v2, r3
805; P9BE-NEXT:    vperm v2, v2, v4, v5
806; P9BE-NEXT:    vmrghw v2, v3, v2
807; P9BE-NEXT:    blr
808;
809; P8LE-LABEL: dont_fold_srem_power_of_two:
810; P8LE:       # %bb.0:
811; P8LE-NEXT:    xxswapd vs0, v2
812; P8LE-NEXT:    lis r3, -21386
813; P8LE-NEXT:    ori r3, r3, 37253
814; P8LE-NEXT:    mffprd r4, f0
815; P8LE-NEXT:    rldicl r5, r4, 16, 48
816; P8LE-NEXT:    clrldi r6, r4, 48
817; P8LE-NEXT:    extsh r5, r5
818; P8LE-NEXT:    extsh r6, r6
819; P8LE-NEXT:    mulhw r3, r5, r3
820; P8LE-NEXT:    rldicl r7, r4, 48, 48
821; P8LE-NEXT:    srawi r8, r6, 6
822; P8LE-NEXT:    extsh r7, r7
823; P8LE-NEXT:    addze r8, r8
824; P8LE-NEXT:    rldicl r4, r4, 32, 48
825; P8LE-NEXT:    srawi r9, r7, 5
826; P8LE-NEXT:    extsh r4, r4
827; P8LE-NEXT:    slwi r8, r8, 6
828; P8LE-NEXT:    add r3, r3, r5
829; P8LE-NEXT:    addze r9, r9
830; P8LE-NEXT:    sub r6, r6, r8
831; P8LE-NEXT:    srwi r10, r3, 31
832; P8LE-NEXT:    srawi r3, r3, 6
833; P8LE-NEXT:    slwi r8, r9, 5
834; P8LE-NEXT:    mtvsrd v2, r6
835; P8LE-NEXT:    add r3, r3, r10
836; P8LE-NEXT:    srawi r9, r4, 3
837; P8LE-NEXT:    sub r6, r7, r8
838; P8LE-NEXT:    mulli r3, r3, 95
839; P8LE-NEXT:    addze r7, r9
840; P8LE-NEXT:    mtvsrd v3, r6
841; P8LE-NEXT:    vmrghh v2, v3, v2
842; P8LE-NEXT:    sub r3, r5, r3
843; P8LE-NEXT:    slwi r5, r7, 3
844; P8LE-NEXT:    sub r4, r4, r5
845; P8LE-NEXT:    mtvsrd v4, r3
846; P8LE-NEXT:    mtvsrd v5, r4
847; P8LE-NEXT:    vmrghh v3, v4, v5
848; P8LE-NEXT:    vmrglw v2, v3, v2
849; P8LE-NEXT:    blr
850;
851; P8BE-LABEL: dont_fold_srem_power_of_two:
852; P8BE:       # %bb.0:
853; P8BE-NEXT:    mfvsrd r4, v2
854; P8BE-NEXT:    lis r3, -21386
855; P8BE-NEXT:    ori r3, r3, 37253
856; P8BE-NEXT:    clrldi r5, r4, 48
857; P8BE-NEXT:    rldicl r6, r4, 32, 48
858; P8BE-NEXT:    extsh r5, r5
859; P8BE-NEXT:    extsh r6, r6
860; P8BE-NEXT:    mulhw r3, r5, r3
861; P8BE-NEXT:    rldicl r7, r4, 16, 48
862; P8BE-NEXT:    srawi r8, r6, 5
863; P8BE-NEXT:    extsh r7, r7
864; P8BE-NEXT:    addze r8, r8
865; P8BE-NEXT:    rldicl r4, r4, 48, 48
866; P8BE-NEXT:    srawi r9, r7, 6
867; P8BE-NEXT:    extsh r4, r4
868; P8BE-NEXT:    slwi r8, r8, 5
869; P8BE-NEXT:    add r3, r3, r5
870; P8BE-NEXT:    addze r9, r9
871; P8BE-NEXT:    sub r6, r6, r8
872; P8BE-NEXT:    srwi r10, r3, 31
873; P8BE-NEXT:    srawi r3, r3, 6
874; P8BE-NEXT:    slwi r8, r9, 6
875; P8BE-NEXT:    mtvsrwz v2, r6
876; P8BE-NEXT:    add r3, r3, r10
877; P8BE-NEXT:    srawi r9, r4, 3
878; P8BE-NEXT:    addis r10, r2, .LCPI3_0@toc@ha
879; P8BE-NEXT:    sub r6, r7, r8
880; P8BE-NEXT:    mulli r3, r3, 95
881; P8BE-NEXT:    addze r8, r9
882; P8BE-NEXT:    addi r7, r10, .LCPI3_0@toc@l
883; P8BE-NEXT:    mtvsrwz v4, r6
884; P8BE-NEXT:    lxvw4x v3, 0, r7
885; P8BE-NEXT:    sub r3, r5, r3
886; P8BE-NEXT:    slwi r5, r8, 3
887; P8BE-NEXT:    vperm v2, v4, v2, v3
888; P8BE-NEXT:    sub r4, r4, r5
889; P8BE-NEXT:    mtvsrwz v5, r3
890; P8BE-NEXT:    mtvsrwz v0, r4
891; P8BE-NEXT:    vperm v3, v0, v5, v3
892; P8BE-NEXT:    vmrghw v2, v2, v3
893; P8BE-NEXT:    blr
894  %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
895  ret <4 x i16> %1
896}
897
898; Don't fold if the divisor is one.
899define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
900; P9LE-LABEL: dont_fold_srem_one:
901; P9LE:       # %bb.0:
902; P9LE-NEXT:    li r3, 2
903; P9LE-NEXT:    lis r4, -14230
904; P9LE-NEXT:    vextuhrx r3, r3, v2
905; P9LE-NEXT:    ori r4, r4, 30865
906; P9LE-NEXT:    extsh r3, r3
907; P9LE-NEXT:    mulhw r4, r3, r4
908; P9LE-NEXT:    add r4, r4, r3
909; P9LE-NEXT:    srwi r5, r4, 31
910; P9LE-NEXT:    srawi r4, r4, 9
911; P9LE-NEXT:    add r4, r4, r5
912; P9LE-NEXT:    mulli r4, r4, 654
913; P9LE-NEXT:    sub r3, r3, r4
914; P9LE-NEXT:    lis r4, -19946
915; P9LE-NEXT:    mtvsrd v3, r3
916; P9LE-NEXT:    li r3, 0
917; P9LE-NEXT:    ori r4, r4, 17097
918; P9LE-NEXT:    mtvsrd v4, r3
919; P9LE-NEXT:    li r3, 4
920; P9LE-NEXT:    vextuhrx r3, r3, v2
921; P9LE-NEXT:    vmrghh v3, v3, v4
922; P9LE-NEXT:    extsh r3, r3
923; P9LE-NEXT:    mulhw r4, r3, r4
924; P9LE-NEXT:    add r4, r4, r3
925; P9LE-NEXT:    srwi r5, r4, 31
926; P9LE-NEXT:    srawi r4, r4, 4
927; P9LE-NEXT:    add r4, r4, r5
928; P9LE-NEXT:    mulli r4, r4, 23
929; P9LE-NEXT:    sub r3, r3, r4
930; P9LE-NEXT:    lis r4, 24749
931; P9LE-NEXT:    mtvsrd v4, r3
932; P9LE-NEXT:    li r3, 6
933; P9LE-NEXT:    ori r4, r4, 47143
934; P9LE-NEXT:    vextuhrx r3, r3, v2
935; P9LE-NEXT:    extsh r3, r3
936; P9LE-NEXT:    mulhw r4, r3, r4
937; P9LE-NEXT:    srwi r5, r4, 31
938; P9LE-NEXT:    srawi r4, r4, 11
939; P9LE-NEXT:    add r4, r4, r5
940; P9LE-NEXT:    mulli r4, r4, 5423
941; P9LE-NEXT:    sub r3, r3, r4
942; P9LE-NEXT:    mtvsrd v2, r3
943; P9LE-NEXT:    vmrghh v2, v2, v4
944; P9LE-NEXT:    vmrglw v2, v2, v3
945; P9LE-NEXT:    blr
946;
947; P9BE-LABEL: dont_fold_srem_one:
948; P9BE:       # %bb.0:
949; P9BE-NEXT:    li r3, 4
950; P9BE-NEXT:    lis r4, -19946
951; P9BE-NEXT:    vextuhlx r3, r3, v2
952; P9BE-NEXT:    ori r4, r4, 17097
953; P9BE-NEXT:    extsh r3, r3
954; P9BE-NEXT:    mulhw r4, r3, r4
955; P9BE-NEXT:    add r4, r4, r3
956; P9BE-NEXT:    srwi r5, r4, 31
957; P9BE-NEXT:    srawi r4, r4, 4
958; P9BE-NEXT:    add r4, r4, r5
959; P9BE-NEXT:    mulli r4, r4, 23
960; P9BE-NEXT:    sub r3, r3, r4
961; P9BE-NEXT:    lis r4, 24749
962; P9BE-NEXT:    mtvsrwz v3, r3
963; P9BE-NEXT:    li r3, 6
964; P9BE-NEXT:    ori r4, r4, 47143
965; P9BE-NEXT:    vextuhlx r3, r3, v2
966; P9BE-NEXT:    extsh r3, r3
967; P9BE-NEXT:    mulhw r4, r3, r4
968; P9BE-NEXT:    srwi r5, r4, 31
969; P9BE-NEXT:    srawi r4, r4, 11
970; P9BE-NEXT:    add r4, r4, r5
971; P9BE-NEXT:    mulli r4, r4, 5423
972; P9BE-NEXT:    sub r3, r3, r4
973; P9BE-NEXT:    lis r4, -14230
974; P9BE-NEXT:    mtvsrwz v4, r3
975; P9BE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
976; P9BE-NEXT:    ori r4, r4, 30865
977; P9BE-NEXT:    addi r3, r3, .LCPI4_0@toc@l
978; P9BE-NEXT:    lxv v5, 0(r3)
979; P9BE-NEXT:    li r3, 2
980; P9BE-NEXT:    vextuhlx r3, r3, v2
981; P9BE-NEXT:    extsh r3, r3
982; P9BE-NEXT:    vperm v3, v3, v4, v5
983; P9BE-NEXT:    mulhw r4, r3, r4
984; P9BE-NEXT:    add r4, r4, r3
985; P9BE-NEXT:    srwi r5, r4, 31
986; P9BE-NEXT:    srawi r4, r4, 9
987; P9BE-NEXT:    add r4, r4, r5
988; P9BE-NEXT:    mulli r4, r4, 654
989; P9BE-NEXT:    sub r3, r3, r4
990; P9BE-NEXT:    mtvsrwz v2, r3
991; P9BE-NEXT:    li r3, 0
992; P9BE-NEXT:    mtvsrwz v4, r3
993; P9BE-NEXT:    vperm v2, v4, v2, v5
994; P9BE-NEXT:    vmrghw v2, v2, v3
995; P9BE-NEXT:    blr
996;
997; P8LE-LABEL: dont_fold_srem_one:
998; P8LE:       # %bb.0:
999; P8LE-NEXT:    xxswapd vs0, v2
1000; P8LE-NEXT:    lis r5, 24749
1001; P8LE-NEXT:    lis r6, -19946
1002; P8LE-NEXT:    lis r8, -14230
1003; P8LE-NEXT:    ori r5, r5, 47143
1004; P8LE-NEXT:    ori r6, r6, 17097
1005; P8LE-NEXT:    ori r8, r8, 30865
1006; P8LE-NEXT:    mffprd r3, f0
1007; P8LE-NEXT:    rldicl r4, r3, 16, 48
1008; P8LE-NEXT:    rldicl r7, r3, 32, 48
1009; P8LE-NEXT:    rldicl r3, r3, 48, 48
1010; P8LE-NEXT:    extsh r4, r4
1011; P8LE-NEXT:    extsh r7, r7
1012; P8LE-NEXT:    extsh r3, r3
1013; P8LE-NEXT:    mulhw r5, r4, r5
1014; P8LE-NEXT:    mulhw r6, r7, r6
1015; P8LE-NEXT:    mulhw r8, r3, r8
1016; P8LE-NEXT:    srwi r9, r5, 31
1017; P8LE-NEXT:    srawi r5, r5, 11
1018; P8LE-NEXT:    add r6, r6, r7
1019; P8LE-NEXT:    add r8, r8, r3
1020; P8LE-NEXT:    add r5, r5, r9
1021; P8LE-NEXT:    srwi r9, r6, 31
1022; P8LE-NEXT:    srawi r6, r6, 4
1023; P8LE-NEXT:    add r6, r6, r9
1024; P8LE-NEXT:    srwi r9, r8, 31
1025; P8LE-NEXT:    srawi r8, r8, 9
1026; P8LE-NEXT:    mulli r5, r5, 5423
1027; P8LE-NEXT:    add r8, r8, r9
1028; P8LE-NEXT:    mulli r6, r6, 23
1029; P8LE-NEXT:    li r9, 0
1030; P8LE-NEXT:    mulli r8, r8, 654
1031; P8LE-NEXT:    mtvsrd v2, r9
1032; P8LE-NEXT:    sub r4, r4, r5
1033; P8LE-NEXT:    sub r5, r7, r6
1034; P8LE-NEXT:    mtvsrd v3, r4
1035; P8LE-NEXT:    sub r3, r3, r8
1036; P8LE-NEXT:    mtvsrd v4, r5
1037; P8LE-NEXT:    mtvsrd v5, r3
1038; P8LE-NEXT:    vmrghh v3, v3, v4
1039; P8LE-NEXT:    vmrghh v2, v5, v2
1040; P8LE-NEXT:    vmrglw v2, v3, v2
1041; P8LE-NEXT:    blr
1042;
1043; P8BE-LABEL: dont_fold_srem_one:
1044; P8BE:       # %bb.0:
1045; P8BE-NEXT:    mfvsrd r4, v2
1046; P8BE-NEXT:    lis r3, 24749
1047; P8BE-NEXT:    lis r7, -19946
1048; P8BE-NEXT:    lis r8, -14230
1049; P8BE-NEXT:    ori r3, r3, 47143
1050; P8BE-NEXT:    ori r7, r7, 17097
1051; P8BE-NEXT:    ori r8, r8, 30865
1052; P8BE-NEXT:    clrldi r5, r4, 48
1053; P8BE-NEXT:    rldicl r6, r4, 48, 48
1054; P8BE-NEXT:    rldicl r4, r4, 32, 48
1055; P8BE-NEXT:    extsh r5, r5
1056; P8BE-NEXT:    extsh r6, r6
1057; P8BE-NEXT:    extsh r4, r4
1058; P8BE-NEXT:    mulhw r3, r5, r3
1059; P8BE-NEXT:    mulhw r7, r6, r7
1060; P8BE-NEXT:    mulhw r8, r4, r8
1061; P8BE-NEXT:    srawi r9, r3, 11
1062; P8BE-NEXT:    srwi r3, r3, 31
1063; P8BE-NEXT:    add r7, r7, r6
1064; P8BE-NEXT:    add r8, r8, r4
1065; P8BE-NEXT:    add r3, r9, r3
1066; P8BE-NEXT:    srwi r9, r7, 31
1067; P8BE-NEXT:    srawi r7, r7, 4
1068; P8BE-NEXT:    srawi r10, r8, 9
1069; P8BE-NEXT:    srwi r8, r8, 31
1070; P8BE-NEXT:    add r7, r7, r9
1071; P8BE-NEXT:    addis r9, r2, .LCPI4_0@toc@ha
1072; P8BE-NEXT:    mulli r3, r3, 5423
1073; P8BE-NEXT:    add r8, r10, r8
1074; P8BE-NEXT:    li r10, 0
1075; P8BE-NEXT:    mulli r7, r7, 23
1076; P8BE-NEXT:    mulli r8, r8, 654
1077; P8BE-NEXT:    mtvsrwz v2, r10
1078; P8BE-NEXT:    sub r3, r5, r3
1079; P8BE-NEXT:    addi r5, r9, .LCPI4_0@toc@l
1080; P8BE-NEXT:    lxvw4x v3, 0, r5
1081; P8BE-NEXT:    sub r5, r6, r7
1082; P8BE-NEXT:    mtvsrwz v4, r3
1083; P8BE-NEXT:    sub r3, r4, r8
1084; P8BE-NEXT:    mtvsrwz v5, r5
1085; P8BE-NEXT:    mtvsrwz v0, r3
1086; P8BE-NEXT:    vperm v4, v5, v4, v3
1087; P8BE-NEXT:    vperm v2, v2, v0, v3
1088; P8BE-NEXT:    vmrghw v2, v2, v4
1089; P8BE-NEXT:    blr
1090  %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
1091  ret <4 x i16> %1
1092}
1093
1094; Don't fold if the divisor is 2^15.
1095define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
1096; P9LE-LABEL: dont_fold_urem_i16_smax:
1097; P9LE:       # %bb.0:
1098; P9LE-NEXT:    li r3, 4
1099; P9LE-NEXT:    lis r4, -19946
1100; P9LE-NEXT:    vextuhrx r3, r3, v2
1101; P9LE-NEXT:    ori r4, r4, 17097
1102; P9LE-NEXT:    extsh r3, r3
1103; P9LE-NEXT:    mulhw r4, r3, r4
1104; P9LE-NEXT:    add r4, r4, r3
1105; P9LE-NEXT:    srwi r5, r4, 31
1106; P9LE-NEXT:    srawi r4, r4, 4
1107; P9LE-NEXT:    add r4, r4, r5
1108; P9LE-NEXT:    mulli r4, r4, 23
1109; P9LE-NEXT:    sub r3, r3, r4
1110; P9LE-NEXT:    lis r4, 24749
1111; P9LE-NEXT:    mtvsrd v3, r3
1112; P9LE-NEXT:    li r3, 6
1113; P9LE-NEXT:    ori r4, r4, 47143
1114; P9LE-NEXT:    vextuhrx r3, r3, v2
1115; P9LE-NEXT:    extsh r3, r3
1116; P9LE-NEXT:    mulhw r4, r3, r4
1117; P9LE-NEXT:    srwi r5, r4, 31
1118; P9LE-NEXT:    srawi r4, r4, 11
1119; P9LE-NEXT:    add r4, r4, r5
1120; P9LE-NEXT:    mulli r4, r4, 5423
1121; P9LE-NEXT:    sub r3, r3, r4
1122; P9LE-NEXT:    mtvsrd v4, r3
1123; P9LE-NEXT:    li r3, 2
1124; P9LE-NEXT:    vextuhrx r3, r3, v2
1125; P9LE-NEXT:    vmrghh v3, v4, v3
1126; P9LE-NEXT:    extsh r3, r3
1127; P9LE-NEXT:    srawi r4, r3, 15
1128; P9LE-NEXT:    addze r4, r4
1129; P9LE-NEXT:    slwi r4, r4, 15
1130; P9LE-NEXT:    sub r3, r3, r4
1131; P9LE-NEXT:    mtvsrd v2, r3
1132; P9LE-NEXT:    li r3, 0
1133; P9LE-NEXT:    mtvsrd v4, r3
1134; P9LE-NEXT:    vmrghh v2, v2, v4
1135; P9LE-NEXT:    vmrglw v2, v3, v2
1136; P9LE-NEXT:    blr
1137;
1138; P9BE-LABEL: dont_fold_urem_i16_smax:
1139; P9BE:       # %bb.0:
1140; P9BE-NEXT:    li r3, 4
1141; P9BE-NEXT:    lis r4, -19946
1142; P9BE-NEXT:    vextuhlx r3, r3, v2
1143; P9BE-NEXT:    ori r4, r4, 17097
1144; P9BE-NEXT:    extsh r3, r3
1145; P9BE-NEXT:    mulhw r4, r3, r4
1146; P9BE-NEXT:    add r4, r4, r3
1147; P9BE-NEXT:    srwi r5, r4, 31
1148; P9BE-NEXT:    srawi r4, r4, 4
1149; P9BE-NEXT:    add r4, r4, r5
1150; P9BE-NEXT:    mulli r4, r4, 23
1151; P9BE-NEXT:    sub r3, r3, r4
1152; P9BE-NEXT:    lis r4, 24749
1153; P9BE-NEXT:    mtvsrwz v3, r3
1154; P9BE-NEXT:    li r3, 6
1155; P9BE-NEXT:    ori r4, r4, 47143
1156; P9BE-NEXT:    vextuhlx r3, r3, v2
1157; P9BE-NEXT:    extsh r3, r3
1158; P9BE-NEXT:    mulhw r4, r3, r4
1159; P9BE-NEXT:    srwi r5, r4, 31
1160; P9BE-NEXT:    srawi r4, r4, 11
1161; P9BE-NEXT:    add r4, r4, r5
1162; P9BE-NEXT:    mulli r4, r4, 5423
1163; P9BE-NEXT:    sub r3, r3, r4
1164; P9BE-NEXT:    mtvsrwz v4, r3
1165; P9BE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
1166; P9BE-NEXT:    addi r3, r3, .LCPI5_0@toc@l
1167; P9BE-NEXT:    lxv v5, 0(r3)
1168; P9BE-NEXT:    li r3, 2
1169; P9BE-NEXT:    vextuhlx r3, r3, v2
1170; P9BE-NEXT:    extsh r3, r3
1171; P9BE-NEXT:    vperm v3, v3, v4, v5
1172; P9BE-NEXT:    srawi r4, r3, 15
1173; P9BE-NEXT:    addze r4, r4
1174; P9BE-NEXT:    slwi r4, r4, 15
1175; P9BE-NEXT:    sub r3, r3, r4
1176; P9BE-NEXT:    mtvsrwz v2, r3
1177; P9BE-NEXT:    li r3, 0
1178; P9BE-NEXT:    mtvsrwz v4, r3
1179; P9BE-NEXT:    vperm v2, v4, v2, v5
1180; P9BE-NEXT:    vmrghw v2, v2, v3
1181; P9BE-NEXT:    blr
1182;
1183; P8LE-LABEL: dont_fold_urem_i16_smax:
1184; P8LE:       # %bb.0:
1185; P8LE-NEXT:    xxswapd vs0, v2
1186; P8LE-NEXT:    lis r4, 24749
1187; P8LE-NEXT:    lis r5, -19946
1188; P8LE-NEXT:    ori r4, r4, 47143
1189; P8LE-NEXT:    ori r5, r5, 17097
1190; P8LE-NEXT:    mffprd r3, f0
1191; P8LE-NEXT:    rldicl r6, r3, 16, 48
1192; P8LE-NEXT:    rldicl r7, r3, 32, 48
1193; P8LE-NEXT:    extsh r6, r6
1194; P8LE-NEXT:    extsh r7, r7
1195; P8LE-NEXT:    mulhw r4, r6, r4
1196; P8LE-NEXT:    mulhw r5, r7, r5
1197; P8LE-NEXT:    rldicl r3, r3, 48, 48
1198; P8LE-NEXT:    extsh r3, r3
1199; P8LE-NEXT:    srwi r8, r4, 31
1200; P8LE-NEXT:    srawi r4, r4, 11
1201; P8LE-NEXT:    add r5, r5, r7
1202; P8LE-NEXT:    add r4, r4, r8
1203; P8LE-NEXT:    srwi r8, r5, 31
1204; P8LE-NEXT:    srawi r5, r5, 4
1205; P8LE-NEXT:    mulli r4, r4, 5423
1206; P8LE-NEXT:    add r5, r5, r8
1207; P8LE-NEXT:    srawi r9, r3, 15
1208; P8LE-NEXT:    li r8, 0
1209; P8LE-NEXT:    mulli r5, r5, 23
1210; P8LE-NEXT:    mtvsrd v2, r8
1211; P8LE-NEXT:    sub r4, r6, r4
1212; P8LE-NEXT:    addze r6, r9
1213; P8LE-NEXT:    slwi r6, r6, 15
1214; P8LE-NEXT:    mtvsrd v3, r4
1215; P8LE-NEXT:    sub r5, r7, r5
1216; P8LE-NEXT:    sub r3, r3, r6
1217; P8LE-NEXT:    mtvsrd v4, r5
1218; P8LE-NEXT:    mtvsrd v5, r3
1219; P8LE-NEXT:    vmrghh v3, v3, v4
1220; P8LE-NEXT:    vmrghh v2, v5, v2
1221; P8LE-NEXT:    vmrglw v2, v3, v2
1222; P8LE-NEXT:    blr
1223;
1224; P8BE-LABEL: dont_fold_urem_i16_smax:
1225; P8BE:       # %bb.0:
1226; P8BE-NEXT:    mfvsrd r3, v2
1227; P8BE-NEXT:    lis r4, 24749
1228; P8BE-NEXT:    lis r5, -19946
1229; P8BE-NEXT:    li r9, 0
1230; P8BE-NEXT:    ori r4, r4, 47143
1231; P8BE-NEXT:    ori r5, r5, 17097
1232; P8BE-NEXT:    mtvsrwz v2, r9
1233; P8BE-NEXT:    clrldi r6, r3, 48
1234; P8BE-NEXT:    rldicl r7, r3, 48, 48
1235; P8BE-NEXT:    extsh r6, r6
1236; P8BE-NEXT:    extsh r7, r7
1237; P8BE-NEXT:    mulhw r4, r6, r4
1238; P8BE-NEXT:    mulhw r5, r7, r5
1239; P8BE-NEXT:    rldicl r3, r3, 32, 48
1240; P8BE-NEXT:    extsh r3, r3
1241; P8BE-NEXT:    srwi r8, r4, 31
1242; P8BE-NEXT:    srawi r4, r4, 11
1243; P8BE-NEXT:    add r5, r5, r7
1244; P8BE-NEXT:    add r4, r4, r8
1245; P8BE-NEXT:    srwi r8, r5, 31
1246; P8BE-NEXT:    srawi r5, r5, 4
1247; P8BE-NEXT:    mulli r4, r4, 5423
1248; P8BE-NEXT:    add r5, r5, r8
1249; P8BE-NEXT:    addis r8, r2, .LCPI5_0@toc@ha
1250; P8BE-NEXT:    srawi r10, r3, 15
1251; P8BE-NEXT:    mulli r5, r5, 23
1252; P8BE-NEXT:    sub r4, r6, r4
1253; P8BE-NEXT:    addi r6, r8, .LCPI5_0@toc@l
1254; P8BE-NEXT:    addze r8, r10
1255; P8BE-NEXT:    lxvw4x v3, 0, r6
1256; P8BE-NEXT:    slwi r6, r8, 15
1257; P8BE-NEXT:    mtvsrwz v4, r4
1258; P8BE-NEXT:    sub r5, r7, r5
1259; P8BE-NEXT:    sub r3, r3, r6
1260; P8BE-NEXT:    mtvsrwz v5, r5
1261; P8BE-NEXT:    mtvsrwz v0, r3
1262; P8BE-NEXT:    vperm v4, v5, v4, v3
1263; P8BE-NEXT:    vperm v2, v2, v0, v3
1264; P8BE-NEXT:    vmrghw v2, v2, v4
1265; P8BE-NEXT:    blr
1266  %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
1267  ret <4 x i16> %1
1268}
1269
1270; Don't fold i64 srem.
1271define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
1272; P9LE-LABEL: dont_fold_srem_i64:
1273; P9LE:       # %bb.0:
1274; P9LE-NEXT:    lis r4, 12374
1275; P9LE-NEXT:    mfvsrd r3, v3
1276; P9LE-NEXT:    ori r4, r4, 56339
1277; P9LE-NEXT:    rldic r4, r4, 33, 1
1278; P9LE-NEXT:    oris r4, r4, 58853
1279; P9LE-NEXT:    ori r4, r4, 6055
1280; P9LE-NEXT:    mulhd r4, r3, r4
1281; P9LE-NEXT:    rldicl r5, r4, 1, 63
1282; P9LE-NEXT:    sradi r4, r4, 11
1283; P9LE-NEXT:    add r4, r4, r5
1284; P9LE-NEXT:    lis r5, 5698
1285; P9LE-NEXT:    mulli r4, r4, 5423
1286; P9LE-NEXT:    ori r5, r5, 51289
1287; P9LE-NEXT:    rldic r5, r5, 35, 0
1288; P9LE-NEXT:    oris r5, r5, 22795
1289; P9LE-NEXT:    sub r3, r3, r4
1290; P9LE-NEXT:    mfvsrld r4, v3
1291; P9LE-NEXT:    ori r5, r5, 8549
1292; P9LE-NEXT:    mulhd r5, r4, r5
1293; P9LE-NEXT:    add r5, r5, r4
1294; P9LE-NEXT:    rldicl r6, r5, 1, 63
1295; P9LE-NEXT:    sradi r5, r5, 4
1296; P9LE-NEXT:    add r5, r5, r6
1297; P9LE-NEXT:    mulli r5, r5, 23
1298; P9LE-NEXT:    sub r4, r4, r5
1299; P9LE-NEXT:    mtvsrdd v3, r3, r4
1300; P9LE-NEXT:    lis r4, 3206
1301; P9LE-NEXT:    mfvsrd r3, v2
1302; P9LE-NEXT:    ori r4, r4, 42889
1303; P9LE-NEXT:    rldic r4, r4, 35, 1
1304; P9LE-NEXT:    oris r4, r4, 1603
1305; P9LE-NEXT:    ori r4, r4, 21445
1306; P9LE-NEXT:    mulhd r4, r3, r4
1307; P9LE-NEXT:    rldicl r5, r4, 1, 63
1308; P9LE-NEXT:    sradi r4, r4, 8
1309; P9LE-NEXT:    add r4, r4, r5
1310; P9LE-NEXT:    mulli r4, r4, 654
1311; P9LE-NEXT:    sub r3, r3, r4
1312; P9LE-NEXT:    li r4, 0
1313; P9LE-NEXT:    mtvsrdd v2, r3, r4
1314; P9LE-NEXT:    blr
1315;
1316; P9BE-LABEL: dont_fold_srem_i64:
1317; P9BE:       # %bb.0:
1318; P9BE-NEXT:    lis r4, 12374
1319; P9BE-NEXT:    mfvsrld r3, v3
1320; P9BE-NEXT:    ori r4, r4, 56339
1321; P9BE-NEXT:    rldic r4, r4, 33, 1
1322; P9BE-NEXT:    oris r4, r4, 58853
1323; P9BE-NEXT:    ori r4, r4, 6055
1324; P9BE-NEXT:    mulhd r4, r3, r4
1325; P9BE-NEXT:    rldicl r5, r4, 1, 63
1326; P9BE-NEXT:    sradi r4, r4, 11
1327; P9BE-NEXT:    add r4, r4, r5
1328; P9BE-NEXT:    lis r5, 5698
1329; P9BE-NEXT:    ori r5, r5, 51289
1330; P9BE-NEXT:    mulli r4, r4, 5423
1331; P9BE-NEXT:    rldic r5, r5, 35, 0
1332; P9BE-NEXT:    oris r5, r5, 22795
1333; P9BE-NEXT:    sub r3, r3, r4
1334; P9BE-NEXT:    mfvsrd r4, v3
1335; P9BE-NEXT:    ori r5, r5, 8549
1336; P9BE-NEXT:    mulhd r5, r4, r5
1337; P9BE-NEXT:    add r5, r5, r4
1338; P9BE-NEXT:    rldicl r6, r5, 1, 63
1339; P9BE-NEXT:    sradi r5, r5, 4
1340; P9BE-NEXT:    add r5, r5, r6
1341; P9BE-NEXT:    mulli r5, r5, 23
1342; P9BE-NEXT:    sub r4, r4, r5
1343; P9BE-NEXT:    mtvsrdd v3, r4, r3
1344; P9BE-NEXT:    lis r4, 3206
1345; P9BE-NEXT:    mfvsrld r3, v2
1346; P9BE-NEXT:    ori r4, r4, 42889
1347; P9BE-NEXT:    rldic r4, r4, 35, 1
1348; P9BE-NEXT:    oris r4, r4, 1603
1349; P9BE-NEXT:    ori r4, r4, 21445
1350; P9BE-NEXT:    mulhd r4, r3, r4
1351; P9BE-NEXT:    rldicl r5, r4, 1, 63
1352; P9BE-NEXT:    sradi r4, r4, 8
1353; P9BE-NEXT:    add r4, r4, r5
1354; P9BE-NEXT:    mulli r4, r4, 654
1355; P9BE-NEXT:    sub r3, r3, r4
1356; P9BE-NEXT:    mtvsrdd v2, 0, r3
1357; P9BE-NEXT:    blr
1358;
1359; P8LE-LABEL: dont_fold_srem_i64:
1360; P8LE:       # %bb.0:
1361; P8LE-NEXT:    lis r3, 12374
1362; P8LE-NEXT:    lis r4, 5698
1363; P8LE-NEXT:    lis r5, 3206
1364; P8LE-NEXT:    xxswapd vs0, v3
1365; P8LE-NEXT:    mfvsrd r6, v3
1366; P8LE-NEXT:    ori r3, r3, 56339
1367; P8LE-NEXT:    ori r4, r4, 51289
1368; P8LE-NEXT:    ori r5, r5, 42889
1369; P8LE-NEXT:    mfvsrd r7, v2
1370; P8LE-NEXT:    rldic r3, r3, 33, 1
1371; P8LE-NEXT:    rldic r4, r4, 35, 0
1372; P8LE-NEXT:    rldic r5, r5, 35, 1
1373; P8LE-NEXT:    oris r3, r3, 58853
1374; P8LE-NEXT:    oris r4, r4, 22795
1375; P8LE-NEXT:    mffprd r8, f0
1376; P8LE-NEXT:    oris r5, r5, 1603
1377; P8LE-NEXT:    ori r3, r3, 6055
1378; P8LE-NEXT:    ori r4, r4, 8549
1379; P8LE-NEXT:    ori r5, r5, 21445
1380; P8LE-NEXT:    mulhd r3, r6, r3
1381; P8LE-NEXT:    mulhd r5, r7, r5
1382; P8LE-NEXT:    mulhd r4, r8, r4
1383; P8LE-NEXT:    rldicl r9, r3, 1, 63
1384; P8LE-NEXT:    sradi r3, r3, 11
1385; P8LE-NEXT:    add r3, r3, r9
1386; P8LE-NEXT:    rldicl r9, r5, 1, 63
1387; P8LE-NEXT:    add r4, r4, r8
1388; P8LE-NEXT:    sradi r5, r5, 8
1389; P8LE-NEXT:    mulli r3, r3, 5423
1390; P8LE-NEXT:    add r5, r5, r9
1391; P8LE-NEXT:    rldicl r9, r4, 1, 63
1392; P8LE-NEXT:    sradi r4, r4, 4
1393; P8LE-NEXT:    mulli r5, r5, 654
1394; P8LE-NEXT:    add r4, r4, r9
1395; P8LE-NEXT:    mulli r4, r4, 23
1396; P8LE-NEXT:    sub r3, r6, r3
1397; P8LE-NEXT:    mtfprd f0, r3
1398; P8LE-NEXT:    sub r5, r7, r5
1399; P8LE-NEXT:    mtfprd f1, r5
1400; P8LE-NEXT:    sub r3, r8, r4
1401; P8LE-NEXT:    li r4, 0
1402; P8LE-NEXT:    mtfprd f2, r3
1403; P8LE-NEXT:    mtfprd f3, r4
1404; P8LE-NEXT:    xxmrghd v3, vs0, vs2
1405; P8LE-NEXT:    xxmrghd v2, vs1, vs3
1406; P8LE-NEXT:    blr
1407;
1408; P8BE-LABEL: dont_fold_srem_i64:
1409; P8BE:       # %bb.0:
1410; P8BE-NEXT:    lis r4, 5698
1411; P8BE-NEXT:    lis r3, 12374
1412; P8BE-NEXT:    xxswapd vs0, v3
1413; P8BE-NEXT:    lis r5, 3206
1414; P8BE-NEXT:    xxswapd vs1, v2
1415; P8BE-NEXT:    ori r4, r4, 51289
1416; P8BE-NEXT:    ori r3, r3, 56339
1417; P8BE-NEXT:    ori r5, r5, 42889
1418; P8BE-NEXT:    mfvsrd r6, v3
1419; P8BE-NEXT:    rldic r4, r4, 35, 0
1420; P8BE-NEXT:    rldic r3, r3, 33, 1
1421; P8BE-NEXT:    oris r4, r4, 22795
1422; P8BE-NEXT:    rldic r5, r5, 35, 1
1423; P8BE-NEXT:    oris r3, r3, 58853
1424; P8BE-NEXT:    mffprd r7, f0
1425; P8BE-NEXT:    ori r4, r4, 8549
1426; P8BE-NEXT:    ori r3, r3, 6055
1427; P8BE-NEXT:    oris r5, r5, 1603
1428; P8BE-NEXT:    mffprd r8, f1
1429; P8BE-NEXT:    mulhd r4, r6, r4
1430; P8BE-NEXT:    mulhd r3, r7, r3
1431; P8BE-NEXT:    ori r5, r5, 21445
1432; P8BE-NEXT:    mulhd r5, r8, r5
1433; P8BE-NEXT:    add r4, r4, r6
1434; P8BE-NEXT:    rldicl r9, r3, 1, 63
1435; P8BE-NEXT:    sradi r3, r3, 11
1436; P8BE-NEXT:    rldicl r10, r4, 1, 63
1437; P8BE-NEXT:    sradi r4, r4, 4
1438; P8BE-NEXT:    add r3, r3, r9
1439; P8BE-NEXT:    rldicl r9, r5, 1, 63
1440; P8BE-NEXT:    add r4, r4, r10
1441; P8BE-NEXT:    sradi r5, r5, 8
1442; P8BE-NEXT:    mulli r3, r3, 5423
1443; P8BE-NEXT:    add r5, r5, r9
1444; P8BE-NEXT:    mulli r4, r4, 23
1445; P8BE-NEXT:    mulli r5, r5, 654
1446; P8BE-NEXT:    sub r3, r7, r3
1447; P8BE-NEXT:    sub r4, r6, r4
1448; P8BE-NEXT:    mtfprd f0, r3
1449; P8BE-NEXT:    sub r3, r8, r5
1450; P8BE-NEXT:    mtfprd f1, r4
1451; P8BE-NEXT:    li r4, 0
1452; P8BE-NEXT:    mtfprd f2, r3
1453; P8BE-NEXT:    mtfprd f3, r4
1454; P8BE-NEXT:    xxmrghd v3, vs1, vs0
1455; P8BE-NEXT:    xxmrghd v2, vs3, vs2
1456; P8BE-NEXT:    blr
1457  %1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
1458  ret <4 x i64> %1
1459}
1460