1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 3; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE 4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 5; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE 6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 7; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE 8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 9; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE 10 11define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { 12; P9LE-LABEL: fold_srem_vec_1: 13; P9LE: # %bb.0: 14; P9LE-NEXT: li r3, 0 15; P9LE-NEXT: lis r4, -21386 16; P9LE-NEXT: vextuhrx r3, r3, v2 17; P9LE-NEXT: ori r4, r4, 37253 18; P9LE-NEXT: extsh r3, r3 19; P9LE-NEXT: mulhw r4, r3, r4 20; P9LE-NEXT: add r4, r4, r3 21; P9LE-NEXT: srwi r5, r4, 31 22; P9LE-NEXT: srawi r4, r4, 6 23; P9LE-NEXT: add r4, r4, r5 24; P9LE-NEXT: mulli r4, r4, 95 25; P9LE-NEXT: sub r3, r3, r4 26; P9LE-NEXT: lis r4, 31710 27; P9LE-NEXT: mtvsrd v3, r3 28; P9LE-NEXT: li r3, 2 29; P9LE-NEXT: ori r4, r4, 63421 30; P9LE-NEXT: vextuhrx r3, r3, v2 31; P9LE-NEXT: extsh r3, r3 32; P9LE-NEXT: mulhw r4, r3, r4 33; P9LE-NEXT: sub r4, r4, r3 34; P9LE-NEXT: srwi r5, r4, 31 35; P9LE-NEXT: srawi r4, r4, 6 36; P9LE-NEXT: add r4, r4, r5 37; P9LE-NEXT: mulli r4, r4, -124 38; P9LE-NEXT: sub r3, r3, r4 39; P9LE-NEXT: lis r4, 21399 40; P9LE-NEXT: mtvsrd v4, r3 41; P9LE-NEXT: li r3, 4 42; P9LE-NEXT: ori r4, r4, 33437 43; P9LE-NEXT: vextuhrx r3, r3, v2 44; P9LE-NEXT: vmrghh v3, v4, v3 45; P9LE-NEXT: extsh r3, r3 46; P9LE-NEXT: mulhw r4, r3, r4 47; P9LE-NEXT: srwi r5, r4, 31 48; P9LE-NEXT: srawi r4, r4, 5 49; P9LE-NEXT: add r4, r4, r5 50; P9LE-NEXT: mulli r4, r4, 98 51; P9LE-NEXT: sub r3, r3, r4 52; P9LE-NEXT: lis r4, -16728 53; P9LE-NEXT: mtvsrd v4, r3 54; P9LE-NEXT: li r3, 6 55; P9LE-NEXT: ori r4, r4, 63249 56; P9LE-NEXT: vextuhrx r3, r3, v2 57; P9LE-NEXT: extsh r3, r3 58; P9LE-NEXT: mulhw r4, r3, r4 59; P9LE-NEXT: srwi r5, r4, 31 60; P9LE-NEXT: srawi r4, r4, 8 61; P9LE-NEXT: add r4, r4, r5 62; P9LE-NEXT: mulli r4, r4, -1003 63; P9LE-NEXT: sub r3, r3, r4 64; P9LE-NEXT: mtvsrd v2, r3 65; P9LE-NEXT: vmrghh v2, v2, v4 66; P9LE-NEXT: vmrglw v2, v2, v3 67; P9LE-NEXT: blr 68; 69; P9BE-LABEL: fold_srem_vec_1: 70; P9BE: # %bb.0: 71; P9BE-NEXT: li r3, 2 72; P9BE-NEXT: lis r4, 31710 73; P9BE-NEXT: vextuhlx r3, r3, v2 74; P9BE-NEXT: ori r4, r4, 63421 75; P9BE-NEXT: extsh r3, r3 76; P9BE-NEXT: mulhw r4, r3, r4 77; P9BE-NEXT: sub r4, r4, r3 78; P9BE-NEXT: srwi r5, r4, 31 79; P9BE-NEXT: srawi r4, r4, 6 80; P9BE-NEXT: add r4, r4, r5 81; P9BE-NEXT: mulli r4, r4, -124 82; P9BE-NEXT: sub r3, r3, r4 83; P9BE-NEXT: lis r4, -21386 84; P9BE-NEXT: mtvsrwz v3, r3 85; P9BE-NEXT: li r3, 0 86; P9BE-NEXT: ori r4, r4, 37253 87; P9BE-NEXT: vextuhlx r3, r3, v2 88; P9BE-NEXT: extsh r3, r3 89; P9BE-NEXT: mulhw r4, r3, r4 90; P9BE-NEXT: add r4, r4, r3 91; P9BE-NEXT: srwi r5, r4, 31 92; P9BE-NEXT: srawi r4, r4, 6 93; P9BE-NEXT: add r4, r4, r5 94; P9BE-NEXT: mulli r4, r4, 95 95; P9BE-NEXT: sub r3, r3, r4 96; P9BE-NEXT: lis r4, -16728 97; P9BE-NEXT: mtvsrwz v4, r3 98; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha 99; P9BE-NEXT: ori r4, r4, 63249 100; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l 101; P9BE-NEXT: lxv v5, 0(r3) 102; P9BE-NEXT: li r3, 6 103; P9BE-NEXT: vextuhlx r3, r3, v2 104; P9BE-NEXT: extsh r3, r3 105; P9BE-NEXT: vperm v3, v4, v3, v5 106; P9BE-NEXT: mulhw r4, r3, r4 107; P9BE-NEXT: srwi r5, r4, 31 108; P9BE-NEXT: srawi r4, r4, 8 109; P9BE-NEXT: add r4, r4, r5 110; P9BE-NEXT: mulli r4, r4, -1003 111; P9BE-NEXT: sub r3, r3, r4 112; P9BE-NEXT: lis r4, 21399 113; P9BE-NEXT: mtvsrwz v4, r3 114; P9BE-NEXT: li r3, 4 115; P9BE-NEXT: ori r4, r4, 33437 116; P9BE-NEXT: vextuhlx r3, r3, v2 117; P9BE-NEXT: extsh r3, r3 118; P9BE-NEXT: mulhw r4, r3, r4 119; P9BE-NEXT: srwi r5, r4, 31 120; P9BE-NEXT: srawi r4, r4, 5 121; P9BE-NEXT: add r4, r4, r5 122; P9BE-NEXT: mulli r4, r4, 98 123; P9BE-NEXT: sub r3, r3, r4 124; P9BE-NEXT: mtvsrwz v2, r3 125; P9BE-NEXT: vperm v2, v2, v4, v5 126; P9BE-NEXT: vmrghw v2, v3, v2 127; P9BE-NEXT: blr 128; 129; P8LE-LABEL: fold_srem_vec_1: 130; P8LE: # %bb.0: 131; P8LE-NEXT: xxswapd vs0, v2 132; P8LE-NEXT: lis r3, 21399 133; P8LE-NEXT: lis r8, -16728 134; P8LE-NEXT: lis r9, -21386 135; P8LE-NEXT: lis r10, 31710 136; P8LE-NEXT: ori r3, r3, 33437 137; P8LE-NEXT: ori r8, r8, 63249 138; P8LE-NEXT: ori r9, r9, 37253 139; P8LE-NEXT: ori r10, r10, 63421 140; P8LE-NEXT: mffprd r4, f0 141; P8LE-NEXT: rldicl r5, r4, 32, 48 142; P8LE-NEXT: rldicl r6, r4, 16, 48 143; P8LE-NEXT: clrldi r7, r4, 48 144; P8LE-NEXT: extsh r5, r5 145; P8LE-NEXT: extsh r6, r6 146; P8LE-NEXT: rldicl r4, r4, 48, 48 147; P8LE-NEXT: extsh r7, r7 148; P8LE-NEXT: mulhw r3, r5, r3 149; P8LE-NEXT: extsh r4, r4 150; P8LE-NEXT: mulhw r8, r6, r8 151; P8LE-NEXT: mulhw r9, r7, r9 152; P8LE-NEXT: mulhw r10, r4, r10 153; P8LE-NEXT: srwi r11, r3, 31 154; P8LE-NEXT: srawi r3, r3, 5 155; P8LE-NEXT: add r3, r3, r11 156; P8LE-NEXT: srwi r11, r8, 31 157; P8LE-NEXT: add r9, r9, r7 158; P8LE-NEXT: srawi r8, r8, 8 159; P8LE-NEXT: sub r10, r10, r4 160; P8LE-NEXT: add r8, r8, r11 161; P8LE-NEXT: srwi r11, r9, 31 162; P8LE-NEXT: srawi r9, r9, 6 163; P8LE-NEXT: mulli r3, r3, 98 164; P8LE-NEXT: add r9, r9, r11 165; P8LE-NEXT: srwi r11, r10, 31 166; P8LE-NEXT: srawi r10, r10, 6 167; P8LE-NEXT: mulli r8, r8, -1003 168; P8LE-NEXT: add r10, r10, r11 169; P8LE-NEXT: mulli r9, r9, 95 170; P8LE-NEXT: mulli r10, r10, -124 171; P8LE-NEXT: sub r3, r5, r3 172; P8LE-NEXT: mtvsrd v2, r3 173; P8LE-NEXT: sub r5, r6, r8 174; P8LE-NEXT: sub r3, r7, r9 175; P8LE-NEXT: mtvsrd v3, r5 176; P8LE-NEXT: sub r4, r4, r10 177; P8LE-NEXT: mtvsrd v4, r3 178; P8LE-NEXT: mtvsrd v5, r4 179; P8LE-NEXT: vmrghh v2, v3, v2 180; P8LE-NEXT: vmrghh v3, v5, v4 181; P8LE-NEXT: vmrglw v2, v2, v3 182; P8LE-NEXT: blr 183; 184; P8BE-LABEL: fold_srem_vec_1: 185; P8BE: # %bb.0: 186; P8BE-NEXT: mfvsrd r4, v2 187; P8BE-NEXT: lis r3, -16728 188; P8BE-NEXT: lis r8, 21399 189; P8BE-NEXT: lis r9, 31710 190; P8BE-NEXT: lis r10, -21386 191; P8BE-NEXT: ori r3, r3, 63249 192; P8BE-NEXT: ori r8, r8, 33437 193; P8BE-NEXT: ori r9, r9, 63421 194; P8BE-NEXT: ori r10, r10, 37253 195; P8BE-NEXT: clrldi r5, r4, 48 196; P8BE-NEXT: rldicl r6, r4, 48, 48 197; P8BE-NEXT: rldicl r7, r4, 32, 48 198; P8BE-NEXT: extsh r5, r5 199; P8BE-NEXT: extsh r6, r6 200; P8BE-NEXT: rldicl r4, r4, 16, 48 201; P8BE-NEXT: extsh r7, r7 202; P8BE-NEXT: mulhw r3, r5, r3 203; P8BE-NEXT: extsh r4, r4 204; P8BE-NEXT: mulhw r8, r6, r8 205; P8BE-NEXT: mulhw r9, r7, r9 206; P8BE-NEXT: mulhw r10, r4, r10 207; P8BE-NEXT: srwi r11, r3, 31 208; P8BE-NEXT: srawi r3, r3, 8 209; P8BE-NEXT: add r3, r3, r11 210; P8BE-NEXT: srwi r11, r8, 31 211; P8BE-NEXT: sub r9, r9, r7 212; P8BE-NEXT: srawi r8, r8, 5 213; P8BE-NEXT: add r10, r10, r4 214; P8BE-NEXT: add r8, r8, r11 215; P8BE-NEXT: srwi r11, r9, 31 216; P8BE-NEXT: srawi r9, r9, 6 217; P8BE-NEXT: mulli r3, r3, -1003 218; P8BE-NEXT: add r9, r9, r11 219; P8BE-NEXT: srwi r11, r10, 31 220; P8BE-NEXT: srawi r10, r10, 6 221; P8BE-NEXT: mulli r8, r8, 98 222; P8BE-NEXT: add r10, r10, r11 223; P8BE-NEXT: mulli r9, r9, -124 224; P8BE-NEXT: mulli r10, r10, 95 225; P8BE-NEXT: sub r3, r5, r3 226; P8BE-NEXT: addis r5, r2, .LCPI0_0@toc@ha 227; P8BE-NEXT: mtvsrwz v2, r3 228; P8BE-NEXT: addi r3, r5, .LCPI0_0@toc@l 229; P8BE-NEXT: sub r6, r6, r8 230; P8BE-NEXT: lxvw4x v3, 0, r3 231; P8BE-NEXT: sub r3, r7, r9 232; P8BE-NEXT: mtvsrwz v4, r6 233; P8BE-NEXT: sub r4, r4, r10 234; P8BE-NEXT: mtvsrwz v5, r3 235; P8BE-NEXT: mtvsrwz v0, r4 236; P8BE-NEXT: vperm v2, v4, v2, v3 237; P8BE-NEXT: vperm v3, v0, v5, v3 238; P8BE-NEXT: vmrghw v2, v3, v2 239; P8BE-NEXT: blr 240 %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003> 241 ret <4 x i16> %1 242} 243 244define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { 245; P9LE-LABEL: fold_srem_vec_2: 246; P9LE: # %bb.0: 247; P9LE-NEXT: li r3, 0 248; P9LE-NEXT: lis r4, -21386 249; P9LE-NEXT: vextuhrx r3, r3, v2 250; P9LE-NEXT: ori r4, r4, 37253 251; P9LE-NEXT: extsh r3, r3 252; P9LE-NEXT: mulhw r5, r3, r4 253; P9LE-NEXT: add r5, r5, r3 254; P9LE-NEXT: srwi r6, r5, 31 255; P9LE-NEXT: srawi r5, r5, 6 256; P9LE-NEXT: add r5, r5, r6 257; P9LE-NEXT: mulli r5, r5, 95 258; P9LE-NEXT: sub r3, r3, r5 259; P9LE-NEXT: mtvsrd v3, r3 260; P9LE-NEXT: li r3, 2 261; P9LE-NEXT: vextuhrx r3, r3, v2 262; P9LE-NEXT: extsh r3, r3 263; P9LE-NEXT: mulhw r5, r3, r4 264; P9LE-NEXT: add r5, r5, r3 265; P9LE-NEXT: srwi r6, r5, 31 266; P9LE-NEXT: srawi r5, r5, 6 267; P9LE-NEXT: add r5, r5, r6 268; P9LE-NEXT: mulli r5, r5, 95 269; P9LE-NEXT: sub r3, r3, r5 270; P9LE-NEXT: mtvsrd v4, r3 271; P9LE-NEXT: li r3, 4 272; P9LE-NEXT: vextuhrx r3, r3, v2 273; P9LE-NEXT: vmrghh v3, v4, v3 274; P9LE-NEXT: extsh r3, r3 275; P9LE-NEXT: mulhw r5, r3, r4 276; P9LE-NEXT: add r5, r5, r3 277; P9LE-NEXT: srwi r6, r5, 31 278; P9LE-NEXT: srawi r5, r5, 6 279; P9LE-NEXT: add r5, r5, r6 280; P9LE-NEXT: mulli r5, r5, 95 281; P9LE-NEXT: sub r3, r3, r5 282; P9LE-NEXT: mtvsrd v4, r3 283; P9LE-NEXT: li r3, 6 284; P9LE-NEXT: vextuhrx r3, r3, v2 285; P9LE-NEXT: extsh r3, r3 286; P9LE-NEXT: mulhw r4, r3, r4 287; P9LE-NEXT: add r4, r4, r3 288; P9LE-NEXT: srwi r5, r4, 31 289; P9LE-NEXT: srawi r4, r4, 6 290; P9LE-NEXT: add r4, r4, r5 291; P9LE-NEXT: mulli r4, r4, 95 292; P9LE-NEXT: sub r3, r3, r4 293; P9LE-NEXT: mtvsrd v2, r3 294; P9LE-NEXT: vmrghh v2, v2, v4 295; P9LE-NEXT: vmrglw v2, v2, v3 296; P9LE-NEXT: blr 297; 298; P9BE-LABEL: fold_srem_vec_2: 299; P9BE: # %bb.0: 300; P9BE-NEXT: li r3, 6 301; P9BE-NEXT: lis r4, -21386 302; P9BE-NEXT: vextuhlx r3, r3, v2 303; P9BE-NEXT: ori r4, r4, 37253 304; P9BE-NEXT: extsh r3, r3 305; P9BE-NEXT: mulhw r5, r3, r4 306; P9BE-NEXT: add r5, r5, r3 307; P9BE-NEXT: srwi r6, r5, 31 308; P9BE-NEXT: srawi r5, r5, 6 309; P9BE-NEXT: add r5, r5, r6 310; P9BE-NEXT: mulli r5, r5, 95 311; P9BE-NEXT: sub r3, r3, r5 312; P9BE-NEXT: mtvsrwz v3, r3 313; P9BE-NEXT: li r3, 4 314; P9BE-NEXT: vextuhlx r3, r3, v2 315; P9BE-NEXT: extsh r3, r3 316; P9BE-NEXT: mulhw r5, r3, r4 317; P9BE-NEXT: add r5, r5, r3 318; P9BE-NEXT: srwi r6, r5, 31 319; P9BE-NEXT: srawi r5, r5, 6 320; P9BE-NEXT: add r5, r5, r6 321; P9BE-NEXT: mulli r5, r5, 95 322; P9BE-NEXT: sub r3, r3, r5 323; P9BE-NEXT: mtvsrwz v4, r3 324; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha 325; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l 326; P9BE-NEXT: lxv v5, 0(r3) 327; P9BE-NEXT: li r3, 2 328; P9BE-NEXT: vextuhlx r3, r3, v2 329; P9BE-NEXT: extsh r3, r3 330; P9BE-NEXT: vperm v3, v4, v3, v5 331; P9BE-NEXT: mulhw r5, r3, r4 332; P9BE-NEXT: add r5, r5, r3 333; P9BE-NEXT: srwi r6, r5, 31 334; P9BE-NEXT: srawi r5, r5, 6 335; P9BE-NEXT: add r5, r5, r6 336; P9BE-NEXT: mulli r5, r5, 95 337; P9BE-NEXT: sub r3, r3, r5 338; P9BE-NEXT: mtvsrwz v4, r3 339; P9BE-NEXT: li r3, 0 340; P9BE-NEXT: vextuhlx r3, r3, v2 341; P9BE-NEXT: extsh r3, r3 342; P9BE-NEXT: mulhw r4, r3, r4 343; P9BE-NEXT: add r4, r4, r3 344; P9BE-NEXT: srwi r5, r4, 31 345; P9BE-NEXT: srawi r4, r4, 6 346; P9BE-NEXT: add r4, r4, r5 347; P9BE-NEXT: mulli r4, r4, 95 348; P9BE-NEXT: sub r3, r3, r4 349; P9BE-NEXT: mtvsrwz v2, r3 350; P9BE-NEXT: vperm v2, v2, v4, v5 351; P9BE-NEXT: vmrghw v2, v2, v3 352; P9BE-NEXT: blr 353; 354; P8LE-LABEL: fold_srem_vec_2: 355; P8LE: # %bb.0: 356; P8LE-NEXT: xxswapd vs0, v2 357; P8LE-NEXT: lis r3, -21386 358; P8LE-NEXT: ori r3, r3, 37253 359; P8LE-NEXT: mffprd r4, f0 360; P8LE-NEXT: clrldi r5, r4, 48 361; P8LE-NEXT: rldicl r6, r4, 48, 48 362; P8LE-NEXT: extsh r5, r5 363; P8LE-NEXT: rldicl r7, r4, 32, 48 364; P8LE-NEXT: extsh r6, r6 365; P8LE-NEXT: mulhw r8, r5, r3 366; P8LE-NEXT: rldicl r4, r4, 16, 48 367; P8LE-NEXT: extsh r7, r7 368; P8LE-NEXT: mulhw r9, r6, r3 369; P8LE-NEXT: extsh r4, r4 370; P8LE-NEXT: mulhw r10, r7, r3 371; P8LE-NEXT: mulhw r3, r4, r3 372; P8LE-NEXT: add r8, r8, r5 373; P8LE-NEXT: add r9, r9, r6 374; P8LE-NEXT: srwi r11, r8, 31 375; P8LE-NEXT: srawi r8, r8, 6 376; P8LE-NEXT: add r10, r10, r7 377; P8LE-NEXT: add r3, r3, r4 378; P8LE-NEXT: add r8, r8, r11 379; P8LE-NEXT: srwi r11, r9, 31 380; P8LE-NEXT: srawi r9, r9, 6 381; P8LE-NEXT: mulli r8, r8, 95 382; P8LE-NEXT: add r9, r9, r11 383; P8LE-NEXT: srwi r11, r10, 31 384; P8LE-NEXT: srawi r10, r10, 6 385; P8LE-NEXT: mulli r9, r9, 95 386; P8LE-NEXT: add r10, r10, r11 387; P8LE-NEXT: srwi r11, r3, 31 388; P8LE-NEXT: srawi r3, r3, 6 389; P8LE-NEXT: mulli r10, r10, 95 390; P8LE-NEXT: sub r5, r5, r8 391; P8LE-NEXT: add r3, r3, r11 392; P8LE-NEXT: mtvsrd v2, r5 393; P8LE-NEXT: mulli r3, r3, 95 394; P8LE-NEXT: sub r6, r6, r9 395; P8LE-NEXT: mtvsrd v3, r6 396; P8LE-NEXT: sub r5, r7, r10 397; P8LE-NEXT: mtvsrd v4, r5 398; P8LE-NEXT: sub r3, r4, r3 399; P8LE-NEXT: vmrghh v2, v3, v2 400; P8LE-NEXT: mtvsrd v5, r3 401; P8LE-NEXT: vmrghh v3, v5, v4 402; P8LE-NEXT: vmrglw v2, v3, v2 403; P8LE-NEXT: blr 404; 405; P8BE-LABEL: fold_srem_vec_2: 406; P8BE: # %bb.0: 407; P8BE-NEXT: mfvsrd r4, v2 408; P8BE-NEXT: lis r3, -21386 409; P8BE-NEXT: ori r3, r3, 37253 410; P8BE-NEXT: clrldi r5, r4, 48 411; P8BE-NEXT: rldicl r6, r4, 48, 48 412; P8BE-NEXT: extsh r5, r5 413; P8BE-NEXT: rldicl r7, r4, 32, 48 414; P8BE-NEXT: extsh r6, r6 415; P8BE-NEXT: mulhw r8, r5, r3 416; P8BE-NEXT: rldicl r4, r4, 16, 48 417; P8BE-NEXT: extsh r7, r7 418; P8BE-NEXT: mulhw r9, r6, r3 419; P8BE-NEXT: extsh r4, r4 420; P8BE-NEXT: mulhw r10, r7, r3 421; P8BE-NEXT: mulhw r3, r4, r3 422; P8BE-NEXT: add r8, r8, r5 423; P8BE-NEXT: add r9, r9, r6 424; P8BE-NEXT: srwi r11, r8, 31 425; P8BE-NEXT: srawi r8, r8, 6 426; P8BE-NEXT: add r10, r10, r7 427; P8BE-NEXT: add r3, r3, r4 428; P8BE-NEXT: add r8, r8, r11 429; P8BE-NEXT: srwi r11, r9, 31 430; P8BE-NEXT: srawi r9, r9, 6 431; P8BE-NEXT: mulli r8, r8, 95 432; P8BE-NEXT: add r9, r9, r11 433; P8BE-NEXT: srwi r11, r10, 31 434; P8BE-NEXT: srawi r10, r10, 6 435; P8BE-NEXT: mulli r9, r9, 95 436; P8BE-NEXT: add r10, r10, r11 437; P8BE-NEXT: srwi r11, r3, 31 438; P8BE-NEXT: srawi r3, r3, 6 439; P8BE-NEXT: mulli r10, r10, 95 440; P8BE-NEXT: sub r5, r5, r8 441; P8BE-NEXT: addis r8, r2, .LCPI1_0@toc@ha 442; P8BE-NEXT: add r3, r3, r11 443; P8BE-NEXT: mtvsrwz v2, r5 444; P8BE-NEXT: addi r5, r8, .LCPI1_0@toc@l 445; P8BE-NEXT: mulli r3, r3, 95 446; P8BE-NEXT: sub r6, r6, r9 447; P8BE-NEXT: lxvw4x v3, 0, r5 448; P8BE-NEXT: mtvsrwz v4, r6 449; P8BE-NEXT: sub r5, r7, r10 450; P8BE-NEXT: mtvsrwz v5, r5 451; P8BE-NEXT: sub r3, r4, r3 452; P8BE-NEXT: vperm v2, v4, v2, v3 453; P8BE-NEXT: mtvsrwz v0, r3 454; P8BE-NEXT: vperm v3, v0, v5, v3 455; P8BE-NEXT: vmrghw v2, v3, v2 456; P8BE-NEXT: blr 457 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 458 ret <4 x i16> %1 459} 460 461 462; Don't fold if we can combine srem with sdiv. 463define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { 464; P9LE-LABEL: combine_srem_sdiv: 465; P9LE: # %bb.0: 466; P9LE-NEXT: li r3, 0 467; P9LE-NEXT: lis r4, -21386 468; P9LE-NEXT: vextuhrx r3, r3, v2 469; P9LE-NEXT: ori r4, r4, 37253 470; P9LE-NEXT: extsh r3, r3 471; P9LE-NEXT: mulhw r5, r3, r4 472; P9LE-NEXT: add r5, r5, r3 473; P9LE-NEXT: srwi r6, r5, 31 474; P9LE-NEXT: srawi r5, r5, 6 475; P9LE-NEXT: add r5, r5, r6 476; P9LE-NEXT: mulli r6, r5, 95 477; P9LE-NEXT: sub r3, r3, r6 478; P9LE-NEXT: mtvsrd v3, r3 479; P9LE-NEXT: li r3, 2 480; P9LE-NEXT: vextuhrx r3, r3, v2 481; P9LE-NEXT: extsh r6, r3 482; P9LE-NEXT: mulhw r7, r6, r4 483; P9LE-NEXT: add r6, r7, r6 484; P9LE-NEXT: srwi r7, r6, 31 485; P9LE-NEXT: srawi r6, r6, 6 486; P9LE-NEXT: add r6, r6, r7 487; P9LE-NEXT: mulli r7, r6, 95 488; P9LE-NEXT: sub r3, r3, r7 489; P9LE-NEXT: mtvsrd v4, r3 490; P9LE-NEXT: li r3, 4 491; P9LE-NEXT: vextuhrx r3, r3, v2 492; P9LE-NEXT: vmrghh v3, v4, v3 493; P9LE-NEXT: extsh r7, r3 494; P9LE-NEXT: mulhw r8, r7, r4 495; P9LE-NEXT: add r7, r8, r7 496; P9LE-NEXT: srwi r8, r7, 31 497; P9LE-NEXT: srawi r7, r7, 6 498; P9LE-NEXT: add r7, r7, r8 499; P9LE-NEXT: mulli r8, r7, 95 500; P9LE-NEXT: sub r3, r3, r8 501; P9LE-NEXT: mtvsrd v4, r3 502; P9LE-NEXT: li r3, 6 503; P9LE-NEXT: vextuhrx r3, r3, v2 504; P9LE-NEXT: extsh r8, r3 505; P9LE-NEXT: mulhw r4, r8, r4 506; P9LE-NEXT: add r4, r4, r8 507; P9LE-NEXT: srwi r8, r4, 31 508; P9LE-NEXT: srawi r4, r4, 6 509; P9LE-NEXT: add r4, r4, r8 510; P9LE-NEXT: mulli r8, r4, 95 511; P9LE-NEXT: mtvsrd v5, r4 512; P9LE-NEXT: sub r3, r3, r8 513; P9LE-NEXT: mtvsrd v2, r3 514; P9LE-NEXT: vmrghh v2, v2, v4 515; P9LE-NEXT: mtvsrd v4, r6 516; P9LE-NEXT: vmrglw v2, v2, v3 517; P9LE-NEXT: mtvsrd v3, r5 518; P9LE-NEXT: vmrghh v3, v4, v3 519; P9LE-NEXT: mtvsrd v4, r7 520; P9LE-NEXT: vmrghh v4, v5, v4 521; P9LE-NEXT: vmrglw v3, v4, v3 522; P9LE-NEXT: vadduhm v2, v2, v3 523; P9LE-NEXT: blr 524; 525; P9BE-LABEL: combine_srem_sdiv: 526; P9BE: # %bb.0: 527; P9BE-NEXT: li r3, 6 528; P9BE-NEXT: lis r5, -21386 529; P9BE-NEXT: vextuhlx r3, r3, v2 530; P9BE-NEXT: ori r5, r5, 37253 531; P9BE-NEXT: extsh r4, r3 532; P9BE-NEXT: mulhw r6, r4, r5 533; P9BE-NEXT: add r4, r6, r4 534; P9BE-NEXT: srwi r6, r4, 31 535; P9BE-NEXT: srawi r4, r4, 6 536; P9BE-NEXT: add r4, r4, r6 537; P9BE-NEXT: mulli r6, r4, 95 538; P9BE-NEXT: sub r3, r3, r6 539; P9BE-NEXT: mtvsrwz v3, r3 540; P9BE-NEXT: li r3, 4 541; P9BE-NEXT: vextuhlx r3, r3, v2 542; P9BE-NEXT: extsh r6, r3 543; P9BE-NEXT: mulhw r7, r6, r5 544; P9BE-NEXT: add r6, r7, r6 545; P9BE-NEXT: srwi r7, r6, 31 546; P9BE-NEXT: srawi r6, r6, 6 547; P9BE-NEXT: add r6, r6, r7 548; P9BE-NEXT: mulli r7, r6, 95 549; P9BE-NEXT: sub r3, r3, r7 550; P9BE-NEXT: mtvsrwz v4, r3 551; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha 552; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l 553; P9BE-NEXT: lxv v5, 0(r3) 554; P9BE-NEXT: li r3, 2 555; P9BE-NEXT: vextuhlx r3, r3, v2 556; P9BE-NEXT: extsh r7, r3 557; P9BE-NEXT: vperm v3, v4, v3, v5 558; P9BE-NEXT: mulhw r8, r7, r5 559; P9BE-NEXT: add r7, r8, r7 560; P9BE-NEXT: srwi r8, r7, 31 561; P9BE-NEXT: srawi r7, r7, 6 562; P9BE-NEXT: add r7, r7, r8 563; P9BE-NEXT: mulli r8, r7, 95 564; P9BE-NEXT: sub r3, r3, r8 565; P9BE-NEXT: mtvsrwz v4, r3 566; P9BE-NEXT: li r3, 0 567; P9BE-NEXT: vextuhlx r3, r3, v2 568; P9BE-NEXT: extsh r3, r3 569; P9BE-NEXT: mulhw r5, r3, r5 570; P9BE-NEXT: add r5, r5, r3 571; P9BE-NEXT: srwi r8, r5, 31 572; P9BE-NEXT: srawi r5, r5, 6 573; P9BE-NEXT: add r5, r5, r8 574; P9BE-NEXT: mulli r8, r5, 95 575; P9BE-NEXT: mtvsrwz v0, r5 576; P9BE-NEXT: sub r3, r3, r8 577; P9BE-NEXT: mtvsrwz v2, r3 578; P9BE-NEXT: vperm v2, v2, v4, v5 579; P9BE-NEXT: mtvsrwz v4, r6 580; P9BE-NEXT: vmrghw v2, v2, v3 581; P9BE-NEXT: mtvsrwz v3, r4 582; P9BE-NEXT: vperm v3, v4, v3, v5 583; P9BE-NEXT: mtvsrwz v4, r7 584; P9BE-NEXT: vperm v4, v0, v4, v5 585; P9BE-NEXT: vmrghw v3, v4, v3 586; P9BE-NEXT: vadduhm v2, v2, v3 587; P9BE-NEXT: blr 588; 589; P8LE-LABEL: combine_srem_sdiv: 590; P8LE: # %bb.0: 591; P8LE-NEXT: xxswapd vs0, v2 592; P8LE-NEXT: lis r3, -21386 593; P8LE-NEXT: ori r3, r3, 37253 594; P8LE-NEXT: mffprd r4, f0 595; P8LE-NEXT: clrldi r5, r4, 48 596; P8LE-NEXT: rldicl r6, r4, 48, 48 597; P8LE-NEXT: rldicl r7, r4, 32, 48 598; P8LE-NEXT: extsh r5, r5 599; P8LE-NEXT: extsh r8, r6 600; P8LE-NEXT: extsh r9, r7 601; P8LE-NEXT: mulhw r10, r5, r3 602; P8LE-NEXT: mulhw r11, r8, r3 603; P8LE-NEXT: rldicl r4, r4, 16, 48 604; P8LE-NEXT: mulhw r12, r9, r3 605; P8LE-NEXT: extsh r0, r4 606; P8LE-NEXT: mulhw r3, r0, r3 607; P8LE-NEXT: add r10, r10, r5 608; P8LE-NEXT: add r8, r11, r8 609; P8LE-NEXT: srwi r11, r10, 31 610; P8LE-NEXT: add r9, r12, r9 611; P8LE-NEXT: srawi r10, r10, 6 612; P8LE-NEXT: srawi r12, r8, 6 613; P8LE-NEXT: srwi r8, r8, 31 614; P8LE-NEXT: add r10, r10, r11 615; P8LE-NEXT: add r3, r3, r0 616; P8LE-NEXT: srawi r11, r9, 6 617; P8LE-NEXT: srwi r9, r9, 31 618; P8LE-NEXT: add r8, r12, r8 619; P8LE-NEXT: mtvsrd v2, r10 620; P8LE-NEXT: mulli r12, r10, 95 621; P8LE-NEXT: add r9, r11, r9 622; P8LE-NEXT: srwi r11, r3, 31 623; P8LE-NEXT: mtvsrd v3, r8 624; P8LE-NEXT: srawi r3, r3, 6 625; P8LE-NEXT: mulli r10, r8, 95 626; P8LE-NEXT: mtvsrd v4, r9 627; P8LE-NEXT: add r3, r3, r11 628; P8LE-NEXT: mulli r8, r9, 95 629; P8LE-NEXT: vmrghh v2, v3, v2 630; P8LE-NEXT: mulli r9, r3, 95 631; P8LE-NEXT: sub r5, r5, r12 632; P8LE-NEXT: sub r6, r6, r10 633; P8LE-NEXT: mtvsrd v3, r5 634; P8LE-NEXT: mtvsrd v5, r6 635; P8LE-NEXT: sub r5, r7, r8 636; P8LE-NEXT: sub r4, r4, r9 637; P8LE-NEXT: mtvsrd v0, r5 638; P8LE-NEXT: mtvsrd v1, r4 639; P8LE-NEXT: vmrghh v3, v5, v3 640; P8LE-NEXT: mtvsrd v5, r3 641; P8LE-NEXT: vmrghh v0, v1, v0 642; P8LE-NEXT: vmrghh v4, v5, v4 643; P8LE-NEXT: vmrglw v3, v0, v3 644; P8LE-NEXT: vmrglw v2, v4, v2 645; P8LE-NEXT: vadduhm v2, v3, v2 646; P8LE-NEXT: blr 647; 648; P8BE-LABEL: combine_srem_sdiv: 649; P8BE: # %bb.0: 650; P8BE-NEXT: mfvsrd r4, v2 651; P8BE-NEXT: lis r3, -21386 652; P8BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill 653; P8BE-NEXT: addis r30, r2, .LCPI2_0@toc@ha 654; P8BE-NEXT: ori r3, r3, 37253 655; P8BE-NEXT: clrldi r5, r4, 48 656; P8BE-NEXT: rldicl r6, r4, 48, 48 657; P8BE-NEXT: rldicl r7, r4, 32, 48 658; P8BE-NEXT: extsh r8, r5 659; P8BE-NEXT: extsh r9, r6 660; P8BE-NEXT: extsh r10, r7 661; P8BE-NEXT: mulhw r11, r8, r3 662; P8BE-NEXT: mulhw r12, r9, r3 663; P8BE-NEXT: rldicl r4, r4, 16, 48 664; P8BE-NEXT: mulhw r0, r10, r3 665; P8BE-NEXT: extsh r4, r4 666; P8BE-NEXT: mulhw r3, r4, r3 667; P8BE-NEXT: add r8, r11, r8 668; P8BE-NEXT: add r9, r12, r9 669; P8BE-NEXT: srwi r11, r8, 31 670; P8BE-NEXT: add r10, r0, r10 671; P8BE-NEXT: srawi r8, r8, 6 672; P8BE-NEXT: addi r0, r30, .LCPI2_0@toc@l 673; P8BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 674; P8BE-NEXT: srawi r12, r9, 6 675; P8BE-NEXT: srwi r9, r9, 31 676; P8BE-NEXT: add r8, r8, r11 677; P8BE-NEXT: add r3, r3, r4 678; P8BE-NEXT: lxvw4x v2, 0, r0 679; P8BE-NEXT: srawi r11, r10, 6 680; P8BE-NEXT: srwi r10, r10, 31 681; P8BE-NEXT: add r9, r12, r9 682; P8BE-NEXT: mtvsrwz v3, r8 683; P8BE-NEXT: mulli r12, r8, 95 684; P8BE-NEXT: add r10, r11, r10 685; P8BE-NEXT: srwi r11, r3, 31 686; P8BE-NEXT: mtvsrwz v4, r9 687; P8BE-NEXT: srawi r3, r3, 6 688; P8BE-NEXT: mulli r8, r9, 95 689; P8BE-NEXT: mtvsrwz v5, r10 690; P8BE-NEXT: add r3, r3, r11 691; P8BE-NEXT: mulli r9, r10, 95 692; P8BE-NEXT: vperm v3, v4, v3, v2 693; P8BE-NEXT: mulli r10, r3, 95 694; P8BE-NEXT: sub r5, r5, r12 695; P8BE-NEXT: sub r6, r6, r8 696; P8BE-NEXT: mtvsrwz v4, r5 697; P8BE-NEXT: mtvsrwz v0, r6 698; P8BE-NEXT: sub r5, r7, r9 699; P8BE-NEXT: sub r4, r4, r10 700; P8BE-NEXT: mtvsrwz v1, r5 701; P8BE-NEXT: mtvsrwz v6, r4 702; P8BE-NEXT: vperm v4, v0, v4, v2 703; P8BE-NEXT: mtvsrwz v0, r3 704; P8BE-NEXT: vperm v1, v6, v1, v2 705; P8BE-NEXT: vperm v2, v0, v5, v2 706; P8BE-NEXT: vmrghw v4, v1, v4 707; P8BE-NEXT: vmrghw v2, v2, v3 708; P8BE-NEXT: vadduhm v2, v4, v2 709; P8BE-NEXT: blr 710 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 711 %2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 712 %3 = add <4 x i16> %1, %2 713 ret <4 x i16> %3 714} 715 716; Don't fold for divisors that are a power of two. 717define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { 718; P9LE-LABEL: dont_fold_srem_power_of_two: 719; P9LE: # %bb.0: 720; P9LE-NEXT: li r3, 0 721; P9LE-NEXT: vextuhrx r3, r3, v2 722; P9LE-NEXT: extsh r3, r3 723; P9LE-NEXT: srawi r4, r3, 6 724; P9LE-NEXT: addze r4, r4 725; P9LE-NEXT: slwi r4, r4, 6 726; P9LE-NEXT: sub r3, r3, r4 727; P9LE-NEXT: mtvsrd v3, r3 728; P9LE-NEXT: li r3, 2 729; P9LE-NEXT: vextuhrx r3, r3, v2 730; P9LE-NEXT: extsh r3, r3 731; P9LE-NEXT: srawi r4, r3, 5 732; P9LE-NEXT: addze r4, r4 733; P9LE-NEXT: slwi r4, r4, 5 734; P9LE-NEXT: sub r3, r3, r4 735; P9LE-NEXT: lis r4, -21386 736; P9LE-NEXT: mtvsrd v4, r3 737; P9LE-NEXT: li r3, 6 738; P9LE-NEXT: ori r4, r4, 37253 739; P9LE-NEXT: vextuhrx r3, r3, v2 740; P9LE-NEXT: vmrghh v3, v4, v3 741; P9LE-NEXT: extsh r3, r3 742; P9LE-NEXT: mulhw r4, r3, r4 743; P9LE-NEXT: add r4, r4, r3 744; P9LE-NEXT: srwi r5, r4, 31 745; P9LE-NEXT: srawi r4, r4, 6 746; P9LE-NEXT: add r4, r4, r5 747; P9LE-NEXT: mulli r4, r4, 95 748; P9LE-NEXT: sub r3, r3, r4 749; P9LE-NEXT: mtvsrd v4, r3 750; P9LE-NEXT: li r3, 4 751; P9LE-NEXT: vextuhrx r3, r3, v2 752; P9LE-NEXT: extsh r3, r3 753; P9LE-NEXT: srawi r4, r3, 3 754; P9LE-NEXT: addze r4, r4 755; P9LE-NEXT: slwi r4, r4, 3 756; P9LE-NEXT: sub r3, r3, r4 757; P9LE-NEXT: mtvsrd v2, r3 758; P9LE-NEXT: vmrghh v2, v4, v2 759; P9LE-NEXT: vmrglw v2, v2, v3 760; P9LE-NEXT: blr 761; 762; P9BE-LABEL: dont_fold_srem_power_of_two: 763; P9BE: # %bb.0: 764; P9BE-NEXT: li r3, 2 765; P9BE-NEXT: vextuhlx r3, r3, v2 766; P9BE-NEXT: extsh r3, r3 767; P9BE-NEXT: srawi r4, r3, 5 768; P9BE-NEXT: addze r4, r4 769; P9BE-NEXT: slwi r4, r4, 5 770; P9BE-NEXT: sub r3, r3, r4 771; P9BE-NEXT: mtvsrwz v3, r3 772; P9BE-NEXT: li r3, 0 773; P9BE-NEXT: vextuhlx r3, r3, v2 774; P9BE-NEXT: extsh r3, r3 775; P9BE-NEXT: srawi r4, r3, 6 776; P9BE-NEXT: addze r4, r4 777; P9BE-NEXT: slwi r4, r4, 6 778; P9BE-NEXT: sub r3, r3, r4 779; P9BE-NEXT: lis r4, -21386 780; P9BE-NEXT: mtvsrwz v4, r3 781; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha 782; P9BE-NEXT: ori r4, r4, 37253 783; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l 784; P9BE-NEXT: lxv v5, 0(r3) 785; P9BE-NEXT: li r3, 6 786; P9BE-NEXT: vextuhlx r3, r3, v2 787; P9BE-NEXT: extsh r3, r3 788; P9BE-NEXT: vperm v3, v4, v3, v5 789; P9BE-NEXT: mulhw r4, r3, r4 790; P9BE-NEXT: add r4, r4, r3 791; P9BE-NEXT: srwi r5, r4, 31 792; P9BE-NEXT: srawi r4, r4, 6 793; P9BE-NEXT: add r4, r4, r5 794; P9BE-NEXT: mulli r4, r4, 95 795; P9BE-NEXT: sub r3, r3, r4 796; P9BE-NEXT: mtvsrwz v4, r3 797; P9BE-NEXT: li r3, 4 798; P9BE-NEXT: vextuhlx r3, r3, v2 799; P9BE-NEXT: extsh r3, r3 800; P9BE-NEXT: srawi r4, r3, 3 801; P9BE-NEXT: addze r4, r4 802; P9BE-NEXT: slwi r4, r4, 3 803; P9BE-NEXT: sub r3, r3, r4 804; P9BE-NEXT: mtvsrwz v2, r3 805; P9BE-NEXT: vperm v2, v2, v4, v5 806; P9BE-NEXT: vmrghw v2, v3, v2 807; P9BE-NEXT: blr 808; 809; P8LE-LABEL: dont_fold_srem_power_of_two: 810; P8LE: # %bb.0: 811; P8LE-NEXT: xxswapd vs0, v2 812; P8LE-NEXT: lis r3, -21386 813; P8LE-NEXT: ori r3, r3, 37253 814; P8LE-NEXT: mffprd r4, f0 815; P8LE-NEXT: rldicl r5, r4, 16, 48 816; P8LE-NEXT: clrldi r6, r4, 48 817; P8LE-NEXT: extsh r5, r5 818; P8LE-NEXT: extsh r6, r6 819; P8LE-NEXT: mulhw r3, r5, r3 820; P8LE-NEXT: rldicl r7, r4, 48, 48 821; P8LE-NEXT: srawi r8, r6, 6 822; P8LE-NEXT: extsh r7, r7 823; P8LE-NEXT: addze r8, r8 824; P8LE-NEXT: rldicl r4, r4, 32, 48 825; P8LE-NEXT: srawi r9, r7, 5 826; P8LE-NEXT: extsh r4, r4 827; P8LE-NEXT: slwi r8, r8, 6 828; P8LE-NEXT: add r3, r3, r5 829; P8LE-NEXT: addze r9, r9 830; P8LE-NEXT: sub r6, r6, r8 831; P8LE-NEXT: srwi r10, r3, 31 832; P8LE-NEXT: srawi r3, r3, 6 833; P8LE-NEXT: slwi r8, r9, 5 834; P8LE-NEXT: mtvsrd v2, r6 835; P8LE-NEXT: add r3, r3, r10 836; P8LE-NEXT: srawi r9, r4, 3 837; P8LE-NEXT: sub r6, r7, r8 838; P8LE-NEXT: mulli r3, r3, 95 839; P8LE-NEXT: addze r7, r9 840; P8LE-NEXT: mtvsrd v3, r6 841; P8LE-NEXT: vmrghh v2, v3, v2 842; P8LE-NEXT: sub r3, r5, r3 843; P8LE-NEXT: slwi r5, r7, 3 844; P8LE-NEXT: sub r4, r4, r5 845; P8LE-NEXT: mtvsrd v4, r3 846; P8LE-NEXT: mtvsrd v5, r4 847; P8LE-NEXT: vmrghh v3, v4, v5 848; P8LE-NEXT: vmrglw v2, v3, v2 849; P8LE-NEXT: blr 850; 851; P8BE-LABEL: dont_fold_srem_power_of_two: 852; P8BE: # %bb.0: 853; P8BE-NEXT: mfvsrd r4, v2 854; P8BE-NEXT: lis r3, -21386 855; P8BE-NEXT: ori r3, r3, 37253 856; P8BE-NEXT: clrldi r5, r4, 48 857; P8BE-NEXT: rldicl r6, r4, 32, 48 858; P8BE-NEXT: extsh r5, r5 859; P8BE-NEXT: extsh r6, r6 860; P8BE-NEXT: mulhw r3, r5, r3 861; P8BE-NEXT: rldicl r7, r4, 16, 48 862; P8BE-NEXT: srawi r8, r6, 5 863; P8BE-NEXT: extsh r7, r7 864; P8BE-NEXT: addze r8, r8 865; P8BE-NEXT: rldicl r4, r4, 48, 48 866; P8BE-NEXT: srawi r9, r7, 6 867; P8BE-NEXT: extsh r4, r4 868; P8BE-NEXT: slwi r8, r8, 5 869; P8BE-NEXT: add r3, r3, r5 870; P8BE-NEXT: addze r9, r9 871; P8BE-NEXT: sub r6, r6, r8 872; P8BE-NEXT: srwi r10, r3, 31 873; P8BE-NEXT: srawi r3, r3, 6 874; P8BE-NEXT: slwi r8, r9, 6 875; P8BE-NEXT: mtvsrwz v2, r6 876; P8BE-NEXT: add r3, r3, r10 877; P8BE-NEXT: srawi r9, r4, 3 878; P8BE-NEXT: addis r10, r2, .LCPI3_0@toc@ha 879; P8BE-NEXT: sub r6, r7, r8 880; P8BE-NEXT: mulli r3, r3, 95 881; P8BE-NEXT: addze r8, r9 882; P8BE-NEXT: addi r7, r10, .LCPI3_0@toc@l 883; P8BE-NEXT: mtvsrwz v4, r6 884; P8BE-NEXT: lxvw4x v3, 0, r7 885; P8BE-NEXT: sub r3, r5, r3 886; P8BE-NEXT: slwi r5, r8, 3 887; P8BE-NEXT: vperm v2, v4, v2, v3 888; P8BE-NEXT: sub r4, r4, r5 889; P8BE-NEXT: mtvsrwz v5, r3 890; P8BE-NEXT: mtvsrwz v0, r4 891; P8BE-NEXT: vperm v3, v0, v5, v3 892; P8BE-NEXT: vmrghw v2, v2, v3 893; P8BE-NEXT: blr 894 %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95> 895 ret <4 x i16> %1 896} 897 898; Don't fold if the divisor is one. 899define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { 900; P9LE-LABEL: dont_fold_srem_one: 901; P9LE: # %bb.0: 902; P9LE-NEXT: li r3, 2 903; P9LE-NEXT: lis r4, -14230 904; P9LE-NEXT: vextuhrx r3, r3, v2 905; P9LE-NEXT: ori r4, r4, 30865 906; P9LE-NEXT: extsh r3, r3 907; P9LE-NEXT: mulhw r4, r3, r4 908; P9LE-NEXT: add r4, r4, r3 909; P9LE-NEXT: srwi r5, r4, 31 910; P9LE-NEXT: srawi r4, r4, 9 911; P9LE-NEXT: add r4, r4, r5 912; P9LE-NEXT: mulli r4, r4, 654 913; P9LE-NEXT: sub r3, r3, r4 914; P9LE-NEXT: lis r4, -19946 915; P9LE-NEXT: mtvsrd v3, r3 916; P9LE-NEXT: li r3, 0 917; P9LE-NEXT: ori r4, r4, 17097 918; P9LE-NEXT: mtvsrd v4, r3 919; P9LE-NEXT: li r3, 4 920; P9LE-NEXT: vextuhrx r3, r3, v2 921; P9LE-NEXT: vmrghh v3, v3, v4 922; P9LE-NEXT: extsh r3, r3 923; P9LE-NEXT: mulhw r4, r3, r4 924; P9LE-NEXT: add r4, r4, r3 925; P9LE-NEXT: srwi r5, r4, 31 926; P9LE-NEXT: srawi r4, r4, 4 927; P9LE-NEXT: add r4, r4, r5 928; P9LE-NEXT: mulli r4, r4, 23 929; P9LE-NEXT: sub r3, r3, r4 930; P9LE-NEXT: lis r4, 24749 931; P9LE-NEXT: mtvsrd v4, r3 932; P9LE-NEXT: li r3, 6 933; P9LE-NEXT: ori r4, r4, 47143 934; P9LE-NEXT: vextuhrx r3, r3, v2 935; P9LE-NEXT: extsh r3, r3 936; P9LE-NEXT: mulhw r4, r3, r4 937; P9LE-NEXT: srwi r5, r4, 31 938; P9LE-NEXT: srawi r4, r4, 11 939; P9LE-NEXT: add r4, r4, r5 940; P9LE-NEXT: mulli r4, r4, 5423 941; P9LE-NEXT: sub r3, r3, r4 942; P9LE-NEXT: mtvsrd v2, r3 943; P9LE-NEXT: vmrghh v2, v2, v4 944; P9LE-NEXT: vmrglw v2, v2, v3 945; P9LE-NEXT: blr 946; 947; P9BE-LABEL: dont_fold_srem_one: 948; P9BE: # %bb.0: 949; P9BE-NEXT: li r3, 4 950; P9BE-NEXT: lis r4, -19946 951; P9BE-NEXT: vextuhlx r3, r3, v2 952; P9BE-NEXT: ori r4, r4, 17097 953; P9BE-NEXT: extsh r3, r3 954; P9BE-NEXT: mulhw r4, r3, r4 955; P9BE-NEXT: add r4, r4, r3 956; P9BE-NEXT: srwi r5, r4, 31 957; P9BE-NEXT: srawi r4, r4, 4 958; P9BE-NEXT: add r4, r4, r5 959; P9BE-NEXT: mulli r4, r4, 23 960; P9BE-NEXT: sub r3, r3, r4 961; P9BE-NEXT: lis r4, 24749 962; P9BE-NEXT: mtvsrwz v3, r3 963; P9BE-NEXT: li r3, 6 964; P9BE-NEXT: ori r4, r4, 47143 965; P9BE-NEXT: vextuhlx r3, r3, v2 966; P9BE-NEXT: extsh r3, r3 967; P9BE-NEXT: mulhw r4, r3, r4 968; P9BE-NEXT: srwi r5, r4, 31 969; P9BE-NEXT: srawi r4, r4, 11 970; P9BE-NEXT: add r4, r4, r5 971; P9BE-NEXT: mulli r4, r4, 5423 972; P9BE-NEXT: sub r3, r3, r4 973; P9BE-NEXT: lis r4, -14230 974; P9BE-NEXT: mtvsrwz v4, r3 975; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha 976; P9BE-NEXT: ori r4, r4, 30865 977; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l 978; P9BE-NEXT: lxv v5, 0(r3) 979; P9BE-NEXT: li r3, 2 980; P9BE-NEXT: vextuhlx r3, r3, v2 981; P9BE-NEXT: extsh r3, r3 982; P9BE-NEXT: vperm v3, v3, v4, v5 983; P9BE-NEXT: mulhw r4, r3, r4 984; P9BE-NEXT: add r4, r4, r3 985; P9BE-NEXT: srwi r5, r4, 31 986; P9BE-NEXT: srawi r4, r4, 9 987; P9BE-NEXT: add r4, r4, r5 988; P9BE-NEXT: mulli r4, r4, 654 989; P9BE-NEXT: sub r3, r3, r4 990; P9BE-NEXT: mtvsrwz v2, r3 991; P9BE-NEXT: li r3, 0 992; P9BE-NEXT: mtvsrwz v4, r3 993; P9BE-NEXT: vperm v2, v4, v2, v5 994; P9BE-NEXT: vmrghw v2, v2, v3 995; P9BE-NEXT: blr 996; 997; P8LE-LABEL: dont_fold_srem_one: 998; P8LE: # %bb.0: 999; P8LE-NEXT: xxswapd vs0, v2 1000; P8LE-NEXT: lis r5, 24749 1001; P8LE-NEXT: lis r6, -19946 1002; P8LE-NEXT: lis r8, -14230 1003; P8LE-NEXT: ori r5, r5, 47143 1004; P8LE-NEXT: ori r6, r6, 17097 1005; P8LE-NEXT: ori r8, r8, 30865 1006; P8LE-NEXT: mffprd r3, f0 1007; P8LE-NEXT: rldicl r4, r3, 16, 48 1008; P8LE-NEXT: rldicl r7, r3, 32, 48 1009; P8LE-NEXT: rldicl r3, r3, 48, 48 1010; P8LE-NEXT: extsh r4, r4 1011; P8LE-NEXT: extsh r7, r7 1012; P8LE-NEXT: extsh r3, r3 1013; P8LE-NEXT: mulhw r5, r4, r5 1014; P8LE-NEXT: mulhw r6, r7, r6 1015; P8LE-NEXT: mulhw r8, r3, r8 1016; P8LE-NEXT: srwi r9, r5, 31 1017; P8LE-NEXT: srawi r5, r5, 11 1018; P8LE-NEXT: add r6, r6, r7 1019; P8LE-NEXT: add r8, r8, r3 1020; P8LE-NEXT: add r5, r5, r9 1021; P8LE-NEXT: srwi r9, r6, 31 1022; P8LE-NEXT: srawi r6, r6, 4 1023; P8LE-NEXT: add r6, r6, r9 1024; P8LE-NEXT: srwi r9, r8, 31 1025; P8LE-NEXT: srawi r8, r8, 9 1026; P8LE-NEXT: mulli r5, r5, 5423 1027; P8LE-NEXT: add r8, r8, r9 1028; P8LE-NEXT: mulli r6, r6, 23 1029; P8LE-NEXT: li r9, 0 1030; P8LE-NEXT: mulli r8, r8, 654 1031; P8LE-NEXT: mtvsrd v2, r9 1032; P8LE-NEXT: sub r4, r4, r5 1033; P8LE-NEXT: sub r5, r7, r6 1034; P8LE-NEXT: mtvsrd v3, r4 1035; P8LE-NEXT: sub r3, r3, r8 1036; P8LE-NEXT: mtvsrd v4, r5 1037; P8LE-NEXT: mtvsrd v5, r3 1038; P8LE-NEXT: vmrghh v3, v3, v4 1039; P8LE-NEXT: vmrghh v2, v5, v2 1040; P8LE-NEXT: vmrglw v2, v3, v2 1041; P8LE-NEXT: blr 1042; 1043; P8BE-LABEL: dont_fold_srem_one: 1044; P8BE: # %bb.0: 1045; P8BE-NEXT: mfvsrd r4, v2 1046; P8BE-NEXT: lis r3, 24749 1047; P8BE-NEXT: lis r7, -19946 1048; P8BE-NEXT: lis r8, -14230 1049; P8BE-NEXT: ori r3, r3, 47143 1050; P8BE-NEXT: ori r7, r7, 17097 1051; P8BE-NEXT: ori r8, r8, 30865 1052; P8BE-NEXT: clrldi r5, r4, 48 1053; P8BE-NEXT: rldicl r6, r4, 48, 48 1054; P8BE-NEXT: rldicl r4, r4, 32, 48 1055; P8BE-NEXT: extsh r5, r5 1056; P8BE-NEXT: extsh r6, r6 1057; P8BE-NEXT: extsh r4, r4 1058; P8BE-NEXT: mulhw r3, r5, r3 1059; P8BE-NEXT: mulhw r7, r6, r7 1060; P8BE-NEXT: mulhw r8, r4, r8 1061; P8BE-NEXT: srawi r9, r3, 11 1062; P8BE-NEXT: srwi r3, r3, 31 1063; P8BE-NEXT: add r7, r7, r6 1064; P8BE-NEXT: add r8, r8, r4 1065; P8BE-NEXT: add r3, r9, r3 1066; P8BE-NEXT: srwi r9, r7, 31 1067; P8BE-NEXT: srawi r7, r7, 4 1068; P8BE-NEXT: srawi r10, r8, 9 1069; P8BE-NEXT: srwi r8, r8, 31 1070; P8BE-NEXT: add r7, r7, r9 1071; P8BE-NEXT: addis r9, r2, .LCPI4_0@toc@ha 1072; P8BE-NEXT: mulli r3, r3, 5423 1073; P8BE-NEXT: add r8, r10, r8 1074; P8BE-NEXT: li r10, 0 1075; P8BE-NEXT: mulli r7, r7, 23 1076; P8BE-NEXT: mulli r8, r8, 654 1077; P8BE-NEXT: mtvsrwz v2, r10 1078; P8BE-NEXT: sub r3, r5, r3 1079; P8BE-NEXT: addi r5, r9, .LCPI4_0@toc@l 1080; P8BE-NEXT: lxvw4x v3, 0, r5 1081; P8BE-NEXT: sub r5, r6, r7 1082; P8BE-NEXT: mtvsrwz v4, r3 1083; P8BE-NEXT: sub r3, r4, r8 1084; P8BE-NEXT: mtvsrwz v5, r5 1085; P8BE-NEXT: mtvsrwz v0, r3 1086; P8BE-NEXT: vperm v4, v5, v4, v3 1087; P8BE-NEXT: vperm v2, v2, v0, v3 1088; P8BE-NEXT: vmrghw v2, v2, v4 1089; P8BE-NEXT: blr 1090 %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423> 1091 ret <4 x i16> %1 1092} 1093 1094; Don't fold if the divisor is 2^15. 1095define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { 1096; P9LE-LABEL: dont_fold_urem_i16_smax: 1097; P9LE: # %bb.0: 1098; P9LE-NEXT: li r3, 4 1099; P9LE-NEXT: lis r4, -19946 1100; P9LE-NEXT: vextuhrx r3, r3, v2 1101; P9LE-NEXT: ori r4, r4, 17097 1102; P9LE-NEXT: extsh r3, r3 1103; P9LE-NEXT: mulhw r4, r3, r4 1104; P9LE-NEXT: add r4, r4, r3 1105; P9LE-NEXT: srwi r5, r4, 31 1106; P9LE-NEXT: srawi r4, r4, 4 1107; P9LE-NEXT: add r4, r4, r5 1108; P9LE-NEXT: mulli r4, r4, 23 1109; P9LE-NEXT: sub r3, r3, r4 1110; P9LE-NEXT: lis r4, 24749 1111; P9LE-NEXT: mtvsrd v3, r3 1112; P9LE-NEXT: li r3, 6 1113; P9LE-NEXT: ori r4, r4, 47143 1114; P9LE-NEXT: vextuhrx r3, r3, v2 1115; P9LE-NEXT: extsh r3, r3 1116; P9LE-NEXT: mulhw r4, r3, r4 1117; P9LE-NEXT: srwi r5, r4, 31 1118; P9LE-NEXT: srawi r4, r4, 11 1119; P9LE-NEXT: add r4, r4, r5 1120; P9LE-NEXT: mulli r4, r4, 5423 1121; P9LE-NEXT: sub r3, r3, r4 1122; P9LE-NEXT: mtvsrd v4, r3 1123; P9LE-NEXT: li r3, 2 1124; P9LE-NEXT: vextuhrx r3, r3, v2 1125; P9LE-NEXT: vmrghh v3, v4, v3 1126; P9LE-NEXT: extsh r3, r3 1127; P9LE-NEXT: srawi r4, r3, 15 1128; P9LE-NEXT: addze r4, r4 1129; P9LE-NEXT: slwi r4, r4, 15 1130; P9LE-NEXT: sub r3, r3, r4 1131; P9LE-NEXT: mtvsrd v2, r3 1132; P9LE-NEXT: li r3, 0 1133; P9LE-NEXT: mtvsrd v4, r3 1134; P9LE-NEXT: vmrghh v2, v2, v4 1135; P9LE-NEXT: vmrglw v2, v3, v2 1136; P9LE-NEXT: blr 1137; 1138; P9BE-LABEL: dont_fold_urem_i16_smax: 1139; P9BE: # %bb.0: 1140; P9BE-NEXT: li r3, 4 1141; P9BE-NEXT: lis r4, -19946 1142; P9BE-NEXT: vextuhlx r3, r3, v2 1143; P9BE-NEXT: ori r4, r4, 17097 1144; P9BE-NEXT: extsh r3, r3 1145; P9BE-NEXT: mulhw r4, r3, r4 1146; P9BE-NEXT: add r4, r4, r3 1147; P9BE-NEXT: srwi r5, r4, 31 1148; P9BE-NEXT: srawi r4, r4, 4 1149; P9BE-NEXT: add r4, r4, r5 1150; P9BE-NEXT: mulli r4, r4, 23 1151; P9BE-NEXT: sub r3, r3, r4 1152; P9BE-NEXT: lis r4, 24749 1153; P9BE-NEXT: mtvsrwz v3, r3 1154; P9BE-NEXT: li r3, 6 1155; P9BE-NEXT: ori r4, r4, 47143 1156; P9BE-NEXT: vextuhlx r3, r3, v2 1157; P9BE-NEXT: extsh r3, r3 1158; P9BE-NEXT: mulhw r4, r3, r4 1159; P9BE-NEXT: srwi r5, r4, 31 1160; P9BE-NEXT: srawi r4, r4, 11 1161; P9BE-NEXT: add r4, r4, r5 1162; P9BE-NEXT: mulli r4, r4, 5423 1163; P9BE-NEXT: sub r3, r3, r4 1164; P9BE-NEXT: mtvsrwz v4, r3 1165; P9BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha 1166; P9BE-NEXT: addi r3, r3, .LCPI5_0@toc@l 1167; P9BE-NEXT: lxv v5, 0(r3) 1168; P9BE-NEXT: li r3, 2 1169; P9BE-NEXT: vextuhlx r3, r3, v2 1170; P9BE-NEXT: extsh r3, r3 1171; P9BE-NEXT: vperm v3, v3, v4, v5 1172; P9BE-NEXT: srawi r4, r3, 15 1173; P9BE-NEXT: addze r4, r4 1174; P9BE-NEXT: slwi r4, r4, 15 1175; P9BE-NEXT: sub r3, r3, r4 1176; P9BE-NEXT: mtvsrwz v2, r3 1177; P9BE-NEXT: li r3, 0 1178; P9BE-NEXT: mtvsrwz v4, r3 1179; P9BE-NEXT: vperm v2, v4, v2, v5 1180; P9BE-NEXT: vmrghw v2, v2, v3 1181; P9BE-NEXT: blr 1182; 1183; P8LE-LABEL: dont_fold_urem_i16_smax: 1184; P8LE: # %bb.0: 1185; P8LE-NEXT: xxswapd vs0, v2 1186; P8LE-NEXT: lis r4, 24749 1187; P8LE-NEXT: lis r5, -19946 1188; P8LE-NEXT: ori r4, r4, 47143 1189; P8LE-NEXT: ori r5, r5, 17097 1190; P8LE-NEXT: mffprd r3, f0 1191; P8LE-NEXT: rldicl r6, r3, 16, 48 1192; P8LE-NEXT: rldicl r7, r3, 32, 48 1193; P8LE-NEXT: extsh r6, r6 1194; P8LE-NEXT: extsh r7, r7 1195; P8LE-NEXT: mulhw r4, r6, r4 1196; P8LE-NEXT: mulhw r5, r7, r5 1197; P8LE-NEXT: rldicl r3, r3, 48, 48 1198; P8LE-NEXT: extsh r3, r3 1199; P8LE-NEXT: srwi r8, r4, 31 1200; P8LE-NEXT: srawi r4, r4, 11 1201; P8LE-NEXT: add r5, r5, r7 1202; P8LE-NEXT: add r4, r4, r8 1203; P8LE-NEXT: srwi r8, r5, 31 1204; P8LE-NEXT: srawi r5, r5, 4 1205; P8LE-NEXT: mulli r4, r4, 5423 1206; P8LE-NEXT: add r5, r5, r8 1207; P8LE-NEXT: srawi r9, r3, 15 1208; P8LE-NEXT: li r8, 0 1209; P8LE-NEXT: mulli r5, r5, 23 1210; P8LE-NEXT: mtvsrd v2, r8 1211; P8LE-NEXT: sub r4, r6, r4 1212; P8LE-NEXT: addze r6, r9 1213; P8LE-NEXT: slwi r6, r6, 15 1214; P8LE-NEXT: mtvsrd v3, r4 1215; P8LE-NEXT: sub r5, r7, r5 1216; P8LE-NEXT: sub r3, r3, r6 1217; P8LE-NEXT: mtvsrd v4, r5 1218; P8LE-NEXT: mtvsrd v5, r3 1219; P8LE-NEXT: vmrghh v3, v3, v4 1220; P8LE-NEXT: vmrghh v2, v5, v2 1221; P8LE-NEXT: vmrglw v2, v3, v2 1222; P8LE-NEXT: blr 1223; 1224; P8BE-LABEL: dont_fold_urem_i16_smax: 1225; P8BE: # %bb.0: 1226; P8BE-NEXT: mfvsrd r3, v2 1227; P8BE-NEXT: lis r4, 24749 1228; P8BE-NEXT: lis r5, -19946 1229; P8BE-NEXT: li r9, 0 1230; P8BE-NEXT: ori r4, r4, 47143 1231; P8BE-NEXT: ori r5, r5, 17097 1232; P8BE-NEXT: mtvsrwz v2, r9 1233; P8BE-NEXT: clrldi r6, r3, 48 1234; P8BE-NEXT: rldicl r7, r3, 48, 48 1235; P8BE-NEXT: extsh r6, r6 1236; P8BE-NEXT: extsh r7, r7 1237; P8BE-NEXT: mulhw r4, r6, r4 1238; P8BE-NEXT: mulhw r5, r7, r5 1239; P8BE-NEXT: rldicl r3, r3, 32, 48 1240; P8BE-NEXT: extsh r3, r3 1241; P8BE-NEXT: srwi r8, r4, 31 1242; P8BE-NEXT: srawi r4, r4, 11 1243; P8BE-NEXT: add r5, r5, r7 1244; P8BE-NEXT: add r4, r4, r8 1245; P8BE-NEXT: srwi r8, r5, 31 1246; P8BE-NEXT: srawi r5, r5, 4 1247; P8BE-NEXT: mulli r4, r4, 5423 1248; P8BE-NEXT: add r5, r5, r8 1249; P8BE-NEXT: addis r8, r2, .LCPI5_0@toc@ha 1250; P8BE-NEXT: srawi r10, r3, 15 1251; P8BE-NEXT: mulli r5, r5, 23 1252; P8BE-NEXT: sub r4, r6, r4 1253; P8BE-NEXT: addi r6, r8, .LCPI5_0@toc@l 1254; P8BE-NEXT: addze r8, r10 1255; P8BE-NEXT: lxvw4x v3, 0, r6 1256; P8BE-NEXT: slwi r6, r8, 15 1257; P8BE-NEXT: mtvsrwz v4, r4 1258; P8BE-NEXT: sub r5, r7, r5 1259; P8BE-NEXT: sub r3, r3, r6 1260; P8BE-NEXT: mtvsrwz v5, r5 1261; P8BE-NEXT: mtvsrwz v0, r3 1262; P8BE-NEXT: vperm v4, v5, v4, v3 1263; P8BE-NEXT: vperm v2, v2, v0, v3 1264; P8BE-NEXT: vmrghw v2, v2, v4 1265; P8BE-NEXT: blr 1266 %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423> 1267 ret <4 x i16> %1 1268} 1269 1270; Don't fold i64 srem. 1271define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) { 1272; P9LE-LABEL: dont_fold_srem_i64: 1273; P9LE: # %bb.0: 1274; P9LE-NEXT: lis r4, 12374 1275; P9LE-NEXT: mfvsrd r3, v3 1276; P9LE-NEXT: ori r4, r4, 56339 1277; P9LE-NEXT: rldic r4, r4, 33, 1 1278; P9LE-NEXT: oris r4, r4, 58853 1279; P9LE-NEXT: ori r4, r4, 6055 1280; P9LE-NEXT: mulhd r4, r3, r4 1281; P9LE-NEXT: rldicl r5, r4, 1, 63 1282; P9LE-NEXT: sradi r4, r4, 11 1283; P9LE-NEXT: add r4, r4, r5 1284; P9LE-NEXT: lis r5, 5698 1285; P9LE-NEXT: mulli r4, r4, 5423 1286; P9LE-NEXT: ori r5, r5, 51289 1287; P9LE-NEXT: rldic r5, r5, 35, 0 1288; P9LE-NEXT: oris r5, r5, 22795 1289; P9LE-NEXT: sub r3, r3, r4 1290; P9LE-NEXT: mfvsrld r4, v3 1291; P9LE-NEXT: ori r5, r5, 8549 1292; P9LE-NEXT: mulhd r5, r4, r5 1293; P9LE-NEXT: add r5, r5, r4 1294; P9LE-NEXT: rldicl r6, r5, 1, 63 1295; P9LE-NEXT: sradi r5, r5, 4 1296; P9LE-NEXT: add r5, r5, r6 1297; P9LE-NEXT: mulli r5, r5, 23 1298; P9LE-NEXT: sub r4, r4, r5 1299; P9LE-NEXT: mtvsrdd v3, r3, r4 1300; P9LE-NEXT: lis r4, 3206 1301; P9LE-NEXT: mfvsrd r3, v2 1302; P9LE-NEXT: ori r4, r4, 42889 1303; P9LE-NEXT: rldic r4, r4, 35, 1 1304; P9LE-NEXT: oris r4, r4, 1603 1305; P9LE-NEXT: ori r4, r4, 21445 1306; P9LE-NEXT: mulhd r4, r3, r4 1307; P9LE-NEXT: rldicl r5, r4, 1, 63 1308; P9LE-NEXT: sradi r4, r4, 8 1309; P9LE-NEXT: add r4, r4, r5 1310; P9LE-NEXT: mulli r4, r4, 654 1311; P9LE-NEXT: sub r3, r3, r4 1312; P9LE-NEXT: li r4, 0 1313; P9LE-NEXT: mtvsrdd v2, r3, r4 1314; P9LE-NEXT: blr 1315; 1316; P9BE-LABEL: dont_fold_srem_i64: 1317; P9BE: # %bb.0: 1318; P9BE-NEXT: lis r4, 12374 1319; P9BE-NEXT: mfvsrld r3, v3 1320; P9BE-NEXT: ori r4, r4, 56339 1321; P9BE-NEXT: rldic r4, r4, 33, 1 1322; P9BE-NEXT: oris r4, r4, 58853 1323; P9BE-NEXT: ori r4, r4, 6055 1324; P9BE-NEXT: mulhd r4, r3, r4 1325; P9BE-NEXT: rldicl r5, r4, 1, 63 1326; P9BE-NEXT: sradi r4, r4, 11 1327; P9BE-NEXT: add r4, r4, r5 1328; P9BE-NEXT: lis r5, 5698 1329; P9BE-NEXT: ori r5, r5, 51289 1330; P9BE-NEXT: mulli r4, r4, 5423 1331; P9BE-NEXT: rldic r5, r5, 35, 0 1332; P9BE-NEXT: oris r5, r5, 22795 1333; P9BE-NEXT: sub r3, r3, r4 1334; P9BE-NEXT: mfvsrd r4, v3 1335; P9BE-NEXT: ori r5, r5, 8549 1336; P9BE-NEXT: mulhd r5, r4, r5 1337; P9BE-NEXT: add r5, r5, r4 1338; P9BE-NEXT: rldicl r6, r5, 1, 63 1339; P9BE-NEXT: sradi r5, r5, 4 1340; P9BE-NEXT: add r5, r5, r6 1341; P9BE-NEXT: mulli r5, r5, 23 1342; P9BE-NEXT: sub r4, r4, r5 1343; P9BE-NEXT: mtvsrdd v3, r4, r3 1344; P9BE-NEXT: lis r4, 3206 1345; P9BE-NEXT: mfvsrld r3, v2 1346; P9BE-NEXT: ori r4, r4, 42889 1347; P9BE-NEXT: rldic r4, r4, 35, 1 1348; P9BE-NEXT: oris r4, r4, 1603 1349; P9BE-NEXT: ori r4, r4, 21445 1350; P9BE-NEXT: mulhd r4, r3, r4 1351; P9BE-NEXT: rldicl r5, r4, 1, 63 1352; P9BE-NEXT: sradi r4, r4, 8 1353; P9BE-NEXT: add r4, r4, r5 1354; P9BE-NEXT: mulli r4, r4, 654 1355; P9BE-NEXT: sub r3, r3, r4 1356; P9BE-NEXT: mtvsrdd v2, 0, r3 1357; P9BE-NEXT: blr 1358; 1359; P8LE-LABEL: dont_fold_srem_i64: 1360; P8LE: # %bb.0: 1361; P8LE-NEXT: lis r3, 12374 1362; P8LE-NEXT: lis r4, 5698 1363; P8LE-NEXT: lis r5, 3206 1364; P8LE-NEXT: xxswapd vs0, v3 1365; P8LE-NEXT: mfvsrd r6, v3 1366; P8LE-NEXT: ori r3, r3, 56339 1367; P8LE-NEXT: ori r4, r4, 51289 1368; P8LE-NEXT: ori r5, r5, 42889 1369; P8LE-NEXT: mfvsrd r7, v2 1370; P8LE-NEXT: rldic r3, r3, 33, 1 1371; P8LE-NEXT: rldic r4, r4, 35, 0 1372; P8LE-NEXT: rldic r5, r5, 35, 1 1373; P8LE-NEXT: oris r3, r3, 58853 1374; P8LE-NEXT: oris r4, r4, 22795 1375; P8LE-NEXT: mffprd r8, f0 1376; P8LE-NEXT: oris r5, r5, 1603 1377; P8LE-NEXT: ori r3, r3, 6055 1378; P8LE-NEXT: ori r4, r4, 8549 1379; P8LE-NEXT: ori r5, r5, 21445 1380; P8LE-NEXT: mulhd r3, r6, r3 1381; P8LE-NEXT: mulhd r5, r7, r5 1382; P8LE-NEXT: mulhd r4, r8, r4 1383; P8LE-NEXT: rldicl r9, r3, 1, 63 1384; P8LE-NEXT: sradi r3, r3, 11 1385; P8LE-NEXT: add r3, r3, r9 1386; P8LE-NEXT: rldicl r9, r5, 1, 63 1387; P8LE-NEXT: add r4, r4, r8 1388; P8LE-NEXT: sradi r5, r5, 8 1389; P8LE-NEXT: mulli r3, r3, 5423 1390; P8LE-NEXT: add r5, r5, r9 1391; P8LE-NEXT: rldicl r9, r4, 1, 63 1392; P8LE-NEXT: sradi r4, r4, 4 1393; P8LE-NEXT: mulli r5, r5, 654 1394; P8LE-NEXT: add r4, r4, r9 1395; P8LE-NEXT: mulli r4, r4, 23 1396; P8LE-NEXT: sub r3, r6, r3 1397; P8LE-NEXT: mtfprd f0, r3 1398; P8LE-NEXT: sub r5, r7, r5 1399; P8LE-NEXT: mtfprd f1, r5 1400; P8LE-NEXT: sub r3, r8, r4 1401; P8LE-NEXT: li r4, 0 1402; P8LE-NEXT: mtfprd f2, r3 1403; P8LE-NEXT: mtfprd f3, r4 1404; P8LE-NEXT: xxmrghd v3, vs0, vs2 1405; P8LE-NEXT: xxmrghd v2, vs1, vs3 1406; P8LE-NEXT: blr 1407; 1408; P8BE-LABEL: dont_fold_srem_i64: 1409; P8BE: # %bb.0: 1410; P8BE-NEXT: lis r4, 5698 1411; P8BE-NEXT: lis r3, 12374 1412; P8BE-NEXT: xxswapd vs0, v3 1413; P8BE-NEXT: lis r5, 3206 1414; P8BE-NEXT: xxswapd vs1, v2 1415; P8BE-NEXT: ori r4, r4, 51289 1416; P8BE-NEXT: ori r3, r3, 56339 1417; P8BE-NEXT: ori r5, r5, 42889 1418; P8BE-NEXT: mfvsrd r6, v3 1419; P8BE-NEXT: rldic r4, r4, 35, 0 1420; P8BE-NEXT: rldic r3, r3, 33, 1 1421; P8BE-NEXT: oris r4, r4, 22795 1422; P8BE-NEXT: rldic r5, r5, 35, 1 1423; P8BE-NEXT: oris r3, r3, 58853 1424; P8BE-NEXT: mffprd r7, f0 1425; P8BE-NEXT: ori r4, r4, 8549 1426; P8BE-NEXT: ori r3, r3, 6055 1427; P8BE-NEXT: oris r5, r5, 1603 1428; P8BE-NEXT: mffprd r8, f1 1429; P8BE-NEXT: mulhd r4, r6, r4 1430; P8BE-NEXT: mulhd r3, r7, r3 1431; P8BE-NEXT: ori r5, r5, 21445 1432; P8BE-NEXT: mulhd r5, r8, r5 1433; P8BE-NEXT: add r4, r4, r6 1434; P8BE-NEXT: rldicl r9, r3, 1, 63 1435; P8BE-NEXT: sradi r3, r3, 11 1436; P8BE-NEXT: rldicl r10, r4, 1, 63 1437; P8BE-NEXT: sradi r4, r4, 4 1438; P8BE-NEXT: add r3, r3, r9 1439; P8BE-NEXT: rldicl r9, r5, 1, 63 1440; P8BE-NEXT: add r4, r4, r10 1441; P8BE-NEXT: sradi r5, r5, 8 1442; P8BE-NEXT: mulli r3, r3, 5423 1443; P8BE-NEXT: add r5, r5, r9 1444; P8BE-NEXT: mulli r4, r4, 23 1445; P8BE-NEXT: mulli r5, r5, 654 1446; P8BE-NEXT: sub r3, r7, r3 1447; P8BE-NEXT: sub r4, r6, r4 1448; P8BE-NEXT: mtfprd f0, r3 1449; P8BE-NEXT: sub r3, r8, r5 1450; P8BE-NEXT: mtfprd f1, r4 1451; P8BE-NEXT: li r4, 0 1452; P8BE-NEXT: mtfprd f2, r3 1453; P8BE-NEXT: mtfprd f3, r4 1454; P8BE-NEXT: xxmrghd v3, vs1, vs0 1455; P8BE-NEXT: xxmrghd v2, vs3, vs2 1456; P8BE-NEXT: blr 1457 %1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423> 1458 ret <4 x i64> %1 1459} 1460