1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 3; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9LE 4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 5; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9BE 6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 7; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8LE 8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 9; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8BE 10 11define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { 12; P9LE-LABEL: fold_urem_vec_1: 13; P9LE: # %bb.0: 14; P9LE-NEXT: li r3, 4 15; P9LE-NEXT: lis r4, 21399 16; P9LE-NEXT: lis r5, 8456 17; P9LE-NEXT: vextuhrx r3, r3, v2 18; P9LE-NEXT: ori r4, r4, 33437 19; P9LE-NEXT: ori r5, r5, 16913 20; P9LE-NEXT: clrlwi r3, r3, 16 21; P9LE-NEXT: mulhwu r4, r3, r4 22; P9LE-NEXT: srwi r4, r4, 5 23; P9LE-NEXT: mulli r4, r4, 98 24; P9LE-NEXT: sub r3, r3, r4 25; P9LE-NEXT: lis r4, 16727 26; P9LE-NEXT: mtvsrd v3, r3 27; P9LE-NEXT: li r3, 6 28; P9LE-NEXT: ori r4, r4, 2287 29; P9LE-NEXT: vextuhrx r3, r3, v2 30; P9LE-NEXT: clrlwi r3, r3, 16 31; P9LE-NEXT: mulhwu r4, r3, r4 32; P9LE-NEXT: srwi r4, r4, 8 33; P9LE-NEXT: mulli r4, r4, 1003 34; P9LE-NEXT: sub r3, r3, r4 35; P9LE-NEXT: mtvsrd v4, r3 36; P9LE-NEXT: li r3, 2 37; P9LE-NEXT: vextuhrx r3, r3, v2 38; P9LE-NEXT: vmrghh v3, v4, v3 39; P9LE-NEXT: clrlwi r4, r3, 16 40; P9LE-NEXT: rlwinm r3, r3, 30, 18, 31 41; P9LE-NEXT: mulhwu r3, r3, r5 42; P9LE-NEXT: srwi r3, r3, 2 43; P9LE-NEXT: mulli r3, r3, 124 44; P9LE-NEXT: sub r3, r4, r3 45; P9LE-NEXT: lis r4, 22765 46; P9LE-NEXT: mtvsrd v4, r3 47; P9LE-NEXT: li r3, 0 48; P9LE-NEXT: ori r4, r4, 8969 49; P9LE-NEXT: vextuhrx r3, r3, v2 50; P9LE-NEXT: clrlwi r3, r3, 16 51; P9LE-NEXT: mulhwu r4, r3, r4 52; P9LE-NEXT: sub r5, r3, r4 53; P9LE-NEXT: srwi r5, r5, 1 54; P9LE-NEXT: add r4, r5, r4 55; P9LE-NEXT: srwi r4, r4, 6 56; P9LE-NEXT: mulli r4, r4, 95 57; P9LE-NEXT: sub r3, r3, r4 58; P9LE-NEXT: mtvsrd v2, r3 59; P9LE-NEXT: vmrghh v2, v4, v2 60; P9LE-NEXT: vmrglw v2, v3, v2 61; P9LE-NEXT: blr 62; 63; P9BE-LABEL: fold_urem_vec_1: 64; P9BE: # %bb.0: 65; P9BE-NEXT: li r3, 6 66; P9BE-NEXT: lis r4, 16727 67; P9BE-NEXT: lis r5, 8456 68; P9BE-NEXT: vextuhlx r3, r3, v2 69; P9BE-NEXT: ori r4, r4, 2287 70; P9BE-NEXT: ori r5, r5, 16913 71; P9BE-NEXT: clrlwi r3, r3, 16 72; P9BE-NEXT: mulhwu r4, r3, r4 73; P9BE-NEXT: srwi r4, r4, 8 74; P9BE-NEXT: mulli r4, r4, 1003 75; P9BE-NEXT: sub r3, r3, r4 76; P9BE-NEXT: lis r4, 21399 77; P9BE-NEXT: mtvsrwz v3, r3 78; P9BE-NEXT: li r3, 4 79; P9BE-NEXT: ori r4, r4, 33437 80; P9BE-NEXT: vextuhlx r3, r3, v2 81; P9BE-NEXT: clrlwi r3, r3, 16 82; P9BE-NEXT: mulhwu r4, r3, r4 83; P9BE-NEXT: srwi r4, r4, 5 84; P9BE-NEXT: mulli r4, r4, 98 85; P9BE-NEXT: sub r3, r3, r4 86; P9BE-NEXT: mtvsrwz v4, r3 87; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha 88; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l 89; P9BE-NEXT: lxv v5, 0(r3) 90; P9BE-NEXT: li r3, 2 91; P9BE-NEXT: vextuhlx r3, r3, v2 92; P9BE-NEXT: clrlwi r4, r3, 16 93; P9BE-NEXT: rlwinm r3, r3, 30, 18, 31 94; P9BE-NEXT: vperm v3, v4, v3, v5 95; P9BE-NEXT: mulhwu r3, r3, r5 96; P9BE-NEXT: srwi r3, r3, 2 97; P9BE-NEXT: mulli r3, r3, 124 98; P9BE-NEXT: sub r3, r4, r3 99; P9BE-NEXT: lis r4, 22765 100; P9BE-NEXT: mtvsrwz v4, r3 101; P9BE-NEXT: li r3, 0 102; P9BE-NEXT: ori r4, r4, 8969 103; P9BE-NEXT: vextuhlx r3, r3, v2 104; P9BE-NEXT: clrlwi r3, r3, 16 105; P9BE-NEXT: mulhwu r4, r3, r4 106; P9BE-NEXT: sub r5, r3, r4 107; P9BE-NEXT: srwi r5, r5, 1 108; P9BE-NEXT: add r4, r5, r4 109; P9BE-NEXT: srwi r4, r4, 6 110; P9BE-NEXT: mulli r4, r4, 95 111; P9BE-NEXT: sub r3, r3, r4 112; P9BE-NEXT: mtvsrwz v2, r3 113; P9BE-NEXT: vperm v2, v2, v4, v5 114; P9BE-NEXT: vmrghw v2, v2, v3 115; P9BE-NEXT: blr 116; 117; P8LE-LABEL: fold_urem_vec_1: 118; P8LE: # %bb.0: 119; P8LE-NEXT: xxswapd vs0, v2 120; P8LE-NEXT: lis r3, 22765 121; P8LE-NEXT: lis r7, 21399 122; P8LE-NEXT: lis r9, 16727 123; P8LE-NEXT: lis r10, 8456 124; P8LE-NEXT: ori r3, r3, 8969 125; P8LE-NEXT: ori r7, r7, 33437 126; P8LE-NEXT: ori r9, r9, 2287 127; P8LE-NEXT: ori r10, r10, 16913 128; P8LE-NEXT: mffprd r4, f0 129; P8LE-NEXT: clrldi r6, r4, 48 130; P8LE-NEXT: rldicl r5, r4, 32, 48 131; P8LE-NEXT: clrlwi r6, r6, 16 132; P8LE-NEXT: rldicl r8, r4, 16, 48 133; P8LE-NEXT: clrlwi r5, r5, 16 134; P8LE-NEXT: mulhwu r3, r6, r3 135; P8LE-NEXT: rldicl r4, r4, 48, 48 136; P8LE-NEXT: clrlwi r8, r8, 16 137; P8LE-NEXT: rlwinm r11, r4, 30, 18, 31 138; P8LE-NEXT: mulhwu r7, r5, r7 139; P8LE-NEXT: clrlwi r4, r4, 16 140; P8LE-NEXT: mulhwu r9, r8, r9 141; P8LE-NEXT: mulhwu r10, r11, r10 142; P8LE-NEXT: sub r11, r6, r3 143; P8LE-NEXT: srwi r11, r11, 1 144; P8LE-NEXT: srwi r7, r7, 5 145; P8LE-NEXT: add r3, r11, r3 146; P8LE-NEXT: srwi r9, r9, 8 147; P8LE-NEXT: srwi r10, r10, 2 148; P8LE-NEXT: srwi r3, r3, 6 149; P8LE-NEXT: mulli r7, r7, 98 150; P8LE-NEXT: mulli r9, r9, 1003 151; P8LE-NEXT: mulli r3, r3, 95 152; P8LE-NEXT: mulli r10, r10, 124 153; P8LE-NEXT: sub r5, r5, r7 154; P8LE-NEXT: sub r7, r8, r9 155; P8LE-NEXT: sub r3, r6, r3 156; P8LE-NEXT: mtvsrd v2, r5 157; P8LE-NEXT: sub r4, r4, r10 158; P8LE-NEXT: mtvsrd v3, r7 159; P8LE-NEXT: mtvsrd v4, r3 160; P8LE-NEXT: mtvsrd v5, r4 161; P8LE-NEXT: vmrghh v2, v3, v2 162; P8LE-NEXT: vmrghh v3, v5, v4 163; P8LE-NEXT: vmrglw v2, v2, v3 164; P8LE-NEXT: blr 165; 166; P8BE-LABEL: fold_urem_vec_1: 167; P8BE: # %bb.0: 168; P8BE-NEXT: mfvsrd r4, v2 169; P8BE-NEXT: lis r3, 22765 170; P8BE-NEXT: lis r7, 16727 171; P8BE-NEXT: lis r9, 21399 172; P8BE-NEXT: lis r10, 8456 173; P8BE-NEXT: ori r3, r3, 8969 174; P8BE-NEXT: ori r7, r7, 2287 175; P8BE-NEXT: ori r9, r9, 33437 176; P8BE-NEXT: ori r10, r10, 16913 177; P8BE-NEXT: rldicl r6, r4, 16, 48 178; P8BE-NEXT: clrldi r5, r4, 48 179; P8BE-NEXT: clrlwi r6, r6, 16 180; P8BE-NEXT: clrlwi r5, r5, 16 181; P8BE-NEXT: mulhwu r3, r6, r3 182; P8BE-NEXT: rldicl r8, r4, 48, 48 183; P8BE-NEXT: mulhwu r7, r5, r7 184; P8BE-NEXT: rldicl r4, r4, 32, 48 185; P8BE-NEXT: clrlwi r8, r8, 16 186; P8BE-NEXT: rlwinm r11, r4, 30, 18, 31 187; P8BE-NEXT: mulhwu r9, r8, r9 188; P8BE-NEXT: clrlwi r4, r4, 16 189; P8BE-NEXT: mulhwu r10, r11, r10 190; P8BE-NEXT: sub r11, r6, r3 191; P8BE-NEXT: srwi r7, r7, 8 192; P8BE-NEXT: srwi r11, r11, 1 193; P8BE-NEXT: add r3, r11, r3 194; P8BE-NEXT: mulli r7, r7, 1003 195; P8BE-NEXT: srwi r9, r9, 5 196; P8BE-NEXT: srwi r3, r3, 6 197; P8BE-NEXT: srwi r10, r10, 2 198; P8BE-NEXT: mulli r9, r9, 98 199; P8BE-NEXT: mulli r3, r3, 95 200; P8BE-NEXT: mulli r10, r10, 124 201; P8BE-NEXT: sub r5, r5, r7 202; P8BE-NEXT: addis r7, r2, .LCPI0_0@toc@ha 203; P8BE-NEXT: mtvsrwz v2, r5 204; P8BE-NEXT: addi r5, r7, .LCPI0_0@toc@l 205; P8BE-NEXT: sub r8, r8, r9 206; P8BE-NEXT: lxvw4x v3, 0, r5 207; P8BE-NEXT: sub r3, r6, r3 208; P8BE-NEXT: sub r4, r4, r10 209; P8BE-NEXT: mtvsrwz v4, r8 210; P8BE-NEXT: mtvsrwz v5, r3 211; P8BE-NEXT: mtvsrwz v0, r4 212; P8BE-NEXT: vperm v2, v4, v2, v3 213; P8BE-NEXT: vperm v3, v5, v0, v3 214; P8BE-NEXT: vmrghw v2, v3, v2 215; P8BE-NEXT: blr 216 %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003> 217 ret <4 x i16> %1 218} 219 220define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { 221; P9LE-LABEL: fold_urem_vec_2: 222; P9LE: # %bb.0: 223; P9LE-NEXT: li r3, 0 224; P9LE-NEXT: lis r4, 22765 225; P9LE-NEXT: vextuhrx r3, r3, v2 226; P9LE-NEXT: ori r4, r4, 8969 227; P9LE-NEXT: clrlwi r3, r3, 16 228; P9LE-NEXT: mulhwu r5, r3, r4 229; P9LE-NEXT: sub r6, r3, r5 230; P9LE-NEXT: srwi r6, r6, 1 231; P9LE-NEXT: add r5, r6, r5 232; P9LE-NEXT: srwi r5, r5, 6 233; P9LE-NEXT: mulli r5, r5, 95 234; P9LE-NEXT: sub r3, r3, r5 235; P9LE-NEXT: mtvsrd v3, r3 236; P9LE-NEXT: li r3, 2 237; P9LE-NEXT: vextuhrx r3, r3, v2 238; P9LE-NEXT: clrlwi r3, r3, 16 239; P9LE-NEXT: mulhwu r5, r3, r4 240; P9LE-NEXT: sub r6, r3, r5 241; P9LE-NEXT: srwi r6, r6, 1 242; P9LE-NEXT: add r5, r6, r5 243; P9LE-NEXT: srwi r5, r5, 6 244; P9LE-NEXT: mulli r5, r5, 95 245; P9LE-NEXT: sub r3, r3, r5 246; P9LE-NEXT: mtvsrd v4, r3 247; P9LE-NEXT: li r3, 4 248; P9LE-NEXT: vextuhrx r3, r3, v2 249; P9LE-NEXT: vmrghh v3, v4, v3 250; P9LE-NEXT: clrlwi r3, r3, 16 251; P9LE-NEXT: mulhwu r5, r3, r4 252; P9LE-NEXT: sub r6, r3, r5 253; P9LE-NEXT: srwi r6, r6, 1 254; P9LE-NEXT: add r5, r6, r5 255; P9LE-NEXT: srwi r5, r5, 6 256; P9LE-NEXT: mulli r5, r5, 95 257; P9LE-NEXT: sub r3, r3, r5 258; P9LE-NEXT: mtvsrd v4, r3 259; P9LE-NEXT: li r3, 6 260; P9LE-NEXT: vextuhrx r3, r3, v2 261; P9LE-NEXT: clrlwi r3, r3, 16 262; P9LE-NEXT: mulhwu r4, r3, r4 263; P9LE-NEXT: sub r5, r3, r4 264; P9LE-NEXT: srwi r5, r5, 1 265; P9LE-NEXT: add r4, r5, r4 266; P9LE-NEXT: srwi r4, r4, 6 267; P9LE-NEXT: mulli r4, r4, 95 268; P9LE-NEXT: sub r3, r3, r4 269; P9LE-NEXT: mtvsrd v2, r3 270; P9LE-NEXT: vmrghh v2, v2, v4 271; P9LE-NEXT: vmrglw v2, v2, v3 272; P9LE-NEXT: blr 273; 274; P9BE-LABEL: fold_urem_vec_2: 275; P9BE: # %bb.0: 276; P9BE-NEXT: li r3, 6 277; P9BE-NEXT: lis r4, 22765 278; P9BE-NEXT: vextuhlx r3, r3, v2 279; P9BE-NEXT: ori r4, r4, 8969 280; P9BE-NEXT: clrlwi r3, r3, 16 281; P9BE-NEXT: mulhwu r5, r3, r4 282; P9BE-NEXT: sub r6, r3, r5 283; P9BE-NEXT: srwi r6, r6, 1 284; P9BE-NEXT: add r5, r6, r5 285; P9BE-NEXT: srwi r5, r5, 6 286; P9BE-NEXT: mulli r5, r5, 95 287; P9BE-NEXT: sub r3, r3, r5 288; P9BE-NEXT: mtvsrwz v3, r3 289; P9BE-NEXT: li r3, 4 290; P9BE-NEXT: vextuhlx r3, r3, v2 291; P9BE-NEXT: clrlwi r3, r3, 16 292; P9BE-NEXT: mulhwu r5, r3, r4 293; P9BE-NEXT: sub r6, r3, r5 294; P9BE-NEXT: srwi r6, r6, 1 295; P9BE-NEXT: add r5, r6, r5 296; P9BE-NEXT: srwi r5, r5, 6 297; P9BE-NEXT: mulli r5, r5, 95 298; P9BE-NEXT: sub r3, r3, r5 299; P9BE-NEXT: mtvsrwz v4, r3 300; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha 301; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l 302; P9BE-NEXT: lxv v5, 0(r3) 303; P9BE-NEXT: li r3, 2 304; P9BE-NEXT: vextuhlx r3, r3, v2 305; P9BE-NEXT: clrlwi r3, r3, 16 306; P9BE-NEXT: vperm v3, v4, v3, v5 307; P9BE-NEXT: mulhwu r5, r3, r4 308; P9BE-NEXT: sub r6, r3, r5 309; P9BE-NEXT: srwi r6, r6, 1 310; P9BE-NEXT: add r5, r6, r5 311; P9BE-NEXT: srwi r5, r5, 6 312; P9BE-NEXT: mulli r5, r5, 95 313; P9BE-NEXT: sub r3, r3, r5 314; P9BE-NEXT: mtvsrwz v4, r3 315; P9BE-NEXT: li r3, 0 316; P9BE-NEXT: vextuhlx r3, r3, v2 317; P9BE-NEXT: clrlwi r3, r3, 16 318; P9BE-NEXT: mulhwu r4, r3, r4 319; P9BE-NEXT: sub r5, r3, r4 320; P9BE-NEXT: srwi r5, r5, 1 321; P9BE-NEXT: add r4, r5, r4 322; P9BE-NEXT: srwi r4, r4, 6 323; P9BE-NEXT: mulli r4, r4, 95 324; P9BE-NEXT: sub r3, r3, r4 325; P9BE-NEXT: mtvsrwz v2, r3 326; P9BE-NEXT: vperm v2, v2, v4, v5 327; P9BE-NEXT: vmrghw v2, v2, v3 328; P9BE-NEXT: blr 329; 330; P8LE-LABEL: fold_urem_vec_2: 331; P8LE: # %bb.0: 332; P8LE-NEXT: xxswapd vs0, v2 333; P8LE-NEXT: lis r3, 22765 334; P8LE-NEXT: ori r3, r3, 8969 335; P8LE-NEXT: mffprd r4, f0 336; P8LE-NEXT: clrldi r5, r4, 48 337; P8LE-NEXT: rldicl r6, r4, 48, 48 338; P8LE-NEXT: clrlwi r5, r5, 16 339; P8LE-NEXT: rldicl r7, r4, 32, 48 340; P8LE-NEXT: clrlwi r6, r6, 16 341; P8LE-NEXT: mulhwu r8, r5, r3 342; P8LE-NEXT: rldicl r4, r4, 16, 48 343; P8LE-NEXT: clrlwi r7, r7, 16 344; P8LE-NEXT: mulhwu r9, r6, r3 345; P8LE-NEXT: clrlwi r4, r4, 16 346; P8LE-NEXT: mulhwu r10, r7, r3 347; P8LE-NEXT: mulhwu r3, r4, r3 348; P8LE-NEXT: sub r11, r5, r8 349; P8LE-NEXT: sub r12, r6, r9 350; P8LE-NEXT: srwi r11, r11, 1 351; P8LE-NEXT: add r8, r11, r8 352; P8LE-NEXT: sub r11, r7, r10 353; P8LE-NEXT: srwi r12, r12, 1 354; P8LE-NEXT: add r9, r12, r9 355; P8LE-NEXT: sub r12, r4, r3 356; P8LE-NEXT: srwi r11, r11, 1 357; P8LE-NEXT: srwi r8, r8, 6 358; P8LE-NEXT: add r10, r11, r10 359; P8LE-NEXT: srwi r11, r12, 1 360; P8LE-NEXT: srwi r9, r9, 6 361; P8LE-NEXT: add r3, r11, r3 362; P8LE-NEXT: mulli r8, r8, 95 363; P8LE-NEXT: srwi r10, r10, 6 364; P8LE-NEXT: srwi r3, r3, 6 365; P8LE-NEXT: mulli r9, r9, 95 366; P8LE-NEXT: mulli r10, r10, 95 367; P8LE-NEXT: mulli r3, r3, 95 368; P8LE-NEXT: sub r5, r5, r8 369; P8LE-NEXT: sub r6, r6, r9 370; P8LE-NEXT: mtvsrd v2, r5 371; P8LE-NEXT: sub r5, r7, r10 372; P8LE-NEXT: sub r3, r4, r3 373; P8LE-NEXT: mtvsrd v3, r6 374; P8LE-NEXT: mtvsrd v4, r5 375; P8LE-NEXT: mtvsrd v5, r3 376; P8LE-NEXT: vmrghh v2, v3, v2 377; P8LE-NEXT: vmrghh v3, v5, v4 378; P8LE-NEXT: vmrglw v2, v3, v2 379; P8LE-NEXT: blr 380; 381; P8BE-LABEL: fold_urem_vec_2: 382; P8BE: # %bb.0: 383; P8BE-NEXT: mfvsrd r4, v2 384; P8BE-NEXT: lis r3, 22765 385; P8BE-NEXT: ori r3, r3, 8969 386; P8BE-NEXT: clrldi r5, r4, 48 387; P8BE-NEXT: rldicl r6, r4, 48, 48 388; P8BE-NEXT: clrlwi r5, r5, 16 389; P8BE-NEXT: rldicl r7, r4, 32, 48 390; P8BE-NEXT: clrlwi r6, r6, 16 391; P8BE-NEXT: mulhwu r8, r5, r3 392; P8BE-NEXT: rldicl r4, r4, 16, 48 393; P8BE-NEXT: clrlwi r7, r7, 16 394; P8BE-NEXT: mulhwu r9, r6, r3 395; P8BE-NEXT: clrlwi r4, r4, 16 396; P8BE-NEXT: mulhwu r10, r7, r3 397; P8BE-NEXT: mulhwu r3, r4, r3 398; P8BE-NEXT: sub r11, r5, r8 399; P8BE-NEXT: sub r12, r6, r9 400; P8BE-NEXT: srwi r11, r11, 1 401; P8BE-NEXT: add r8, r11, r8 402; P8BE-NEXT: sub r11, r7, r10 403; P8BE-NEXT: srwi r12, r12, 1 404; P8BE-NEXT: add r9, r12, r9 405; P8BE-NEXT: sub r12, r4, r3 406; P8BE-NEXT: srwi r11, r11, 1 407; P8BE-NEXT: srwi r8, r8, 6 408; P8BE-NEXT: add r10, r11, r10 409; P8BE-NEXT: srwi r11, r12, 1 410; P8BE-NEXT: srwi r9, r9, 6 411; P8BE-NEXT: mulli r8, r8, 95 412; P8BE-NEXT: add r3, r11, r3 413; P8BE-NEXT: srwi r10, r10, 6 414; P8BE-NEXT: srwi r3, r3, 6 415; P8BE-NEXT: mulli r9, r9, 95 416; P8BE-NEXT: mulli r10, r10, 95 417; P8BE-NEXT: mulli r3, r3, 95 418; P8BE-NEXT: sub r5, r5, r8 419; P8BE-NEXT: addis r8, r2, .LCPI1_0@toc@ha 420; P8BE-NEXT: mtvsrwz v2, r5 421; P8BE-NEXT: addi r5, r8, .LCPI1_0@toc@l 422; P8BE-NEXT: sub r6, r6, r9 423; P8BE-NEXT: lxvw4x v3, 0, r5 424; P8BE-NEXT: sub r5, r7, r10 425; P8BE-NEXT: sub r3, r4, r3 426; P8BE-NEXT: mtvsrwz v4, r6 427; P8BE-NEXT: mtvsrwz v5, r5 428; P8BE-NEXT: mtvsrwz v0, r3 429; P8BE-NEXT: vperm v2, v4, v2, v3 430; P8BE-NEXT: vperm v3, v0, v5, v3 431; P8BE-NEXT: vmrghw v2, v3, v2 432; P8BE-NEXT: blr 433 %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 434 ret <4 x i16> %1 435} 436 437 438; Don't fold if we can combine urem with udiv. 439define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { 440; P9LE-LABEL: combine_urem_udiv: 441; P9LE: # %bb.0: 442; P9LE-NEXT: li r3, 0 443; P9LE-NEXT: lis r4, 22765 444; P9LE-NEXT: vextuhrx r3, r3, v2 445; P9LE-NEXT: ori r4, r4, 8969 446; P9LE-NEXT: clrlwi r3, r3, 16 447; P9LE-NEXT: mulhwu r5, r3, r4 448; P9LE-NEXT: sub r6, r3, r5 449; P9LE-NEXT: srwi r6, r6, 1 450; P9LE-NEXT: add r5, r6, r5 451; P9LE-NEXT: srwi r5, r5, 6 452; P9LE-NEXT: mulli r6, r5, 95 453; P9LE-NEXT: sub r3, r3, r6 454; P9LE-NEXT: mtvsrd v3, r3 455; P9LE-NEXT: li r3, 2 456; P9LE-NEXT: vextuhrx r3, r3, v2 457; P9LE-NEXT: clrlwi r6, r3, 16 458; P9LE-NEXT: mulhwu r7, r6, r4 459; P9LE-NEXT: sub r6, r6, r7 460; P9LE-NEXT: srwi r6, r6, 1 461; P9LE-NEXT: add r6, r6, r7 462; P9LE-NEXT: srwi r6, r6, 6 463; P9LE-NEXT: mulli r7, r6, 95 464; P9LE-NEXT: sub r3, r3, r7 465; P9LE-NEXT: mtvsrd v4, r3 466; P9LE-NEXT: li r3, 4 467; P9LE-NEXT: vextuhrx r3, r3, v2 468; P9LE-NEXT: vmrghh v3, v4, v3 469; P9LE-NEXT: clrlwi r7, r3, 16 470; P9LE-NEXT: mulhwu r8, r7, r4 471; P9LE-NEXT: sub r7, r7, r8 472; P9LE-NEXT: srwi r7, r7, 1 473; P9LE-NEXT: add r7, r7, r8 474; P9LE-NEXT: srwi r7, r7, 6 475; P9LE-NEXT: mulli r8, r7, 95 476; P9LE-NEXT: sub r3, r3, r8 477; P9LE-NEXT: mtvsrd v4, r3 478; P9LE-NEXT: li r3, 6 479; P9LE-NEXT: vextuhrx r3, r3, v2 480; P9LE-NEXT: clrlwi r8, r3, 16 481; P9LE-NEXT: mulhwu r4, r8, r4 482; P9LE-NEXT: sub r8, r8, r4 483; P9LE-NEXT: srwi r8, r8, 1 484; P9LE-NEXT: add r4, r8, r4 485; P9LE-NEXT: srwi r4, r4, 6 486; P9LE-NEXT: mulli r8, r4, 95 487; P9LE-NEXT: mtvsrd v5, r4 488; P9LE-NEXT: sub r3, r3, r8 489; P9LE-NEXT: mtvsrd v2, r3 490; P9LE-NEXT: vmrghh v2, v2, v4 491; P9LE-NEXT: mtvsrd v4, r6 492; P9LE-NEXT: vmrglw v2, v2, v3 493; P9LE-NEXT: mtvsrd v3, r5 494; P9LE-NEXT: vmrghh v3, v4, v3 495; P9LE-NEXT: mtvsrd v4, r7 496; P9LE-NEXT: vmrghh v4, v5, v4 497; P9LE-NEXT: vmrglw v3, v4, v3 498; P9LE-NEXT: vadduhm v2, v2, v3 499; P9LE-NEXT: blr 500; 501; P9BE-LABEL: combine_urem_udiv: 502; P9BE: # %bb.0: 503; P9BE-NEXT: li r3, 6 504; P9BE-NEXT: lis r5, 22765 505; P9BE-NEXT: vextuhlx r3, r3, v2 506; P9BE-NEXT: ori r5, r5, 8969 507; P9BE-NEXT: clrlwi r4, r3, 16 508; P9BE-NEXT: mulhwu r6, r4, r5 509; P9BE-NEXT: sub r4, r4, r6 510; P9BE-NEXT: srwi r4, r4, 1 511; P9BE-NEXT: add r4, r4, r6 512; P9BE-NEXT: srwi r4, r4, 6 513; P9BE-NEXT: mulli r6, r4, 95 514; P9BE-NEXT: sub r3, r3, r6 515; P9BE-NEXT: mtvsrwz v3, r3 516; P9BE-NEXT: li r3, 4 517; P9BE-NEXT: vextuhlx r3, r3, v2 518; P9BE-NEXT: clrlwi r6, r3, 16 519; P9BE-NEXT: mulhwu r7, r6, r5 520; P9BE-NEXT: sub r6, r6, r7 521; P9BE-NEXT: srwi r6, r6, 1 522; P9BE-NEXT: add r6, r6, r7 523; P9BE-NEXT: srwi r6, r6, 6 524; P9BE-NEXT: mulli r7, r6, 95 525; P9BE-NEXT: sub r3, r3, r7 526; P9BE-NEXT: mtvsrwz v4, r3 527; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha 528; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l 529; P9BE-NEXT: lxv v5, 0(r3) 530; P9BE-NEXT: li r3, 2 531; P9BE-NEXT: vextuhlx r3, r3, v2 532; P9BE-NEXT: clrlwi r7, r3, 16 533; P9BE-NEXT: vperm v3, v4, v3, v5 534; P9BE-NEXT: mulhwu r8, r7, r5 535; P9BE-NEXT: sub r7, r7, r8 536; P9BE-NEXT: srwi r7, r7, 1 537; P9BE-NEXT: add r7, r7, r8 538; P9BE-NEXT: srwi r7, r7, 6 539; P9BE-NEXT: mulli r8, r7, 95 540; P9BE-NEXT: sub r3, r3, r8 541; P9BE-NEXT: mtvsrwz v4, r3 542; P9BE-NEXT: li r3, 0 543; P9BE-NEXT: vextuhlx r3, r3, v2 544; P9BE-NEXT: clrlwi r3, r3, 16 545; P9BE-NEXT: mulhwu r5, r3, r5 546; P9BE-NEXT: sub r8, r3, r5 547; P9BE-NEXT: srwi r8, r8, 1 548; P9BE-NEXT: add r5, r8, r5 549; P9BE-NEXT: srwi r5, r5, 6 550; P9BE-NEXT: mulli r8, r5, 95 551; P9BE-NEXT: mtvsrwz v0, r5 552; P9BE-NEXT: sub r3, r3, r8 553; P9BE-NEXT: mtvsrwz v2, r3 554; P9BE-NEXT: vperm v2, v2, v4, v5 555; P9BE-NEXT: mtvsrwz v4, r6 556; P9BE-NEXT: vmrghw v2, v2, v3 557; P9BE-NEXT: mtvsrwz v3, r4 558; P9BE-NEXT: vperm v3, v4, v3, v5 559; P9BE-NEXT: mtvsrwz v4, r7 560; P9BE-NEXT: vperm v4, v0, v4, v5 561; P9BE-NEXT: vmrghw v3, v4, v3 562; P9BE-NEXT: vadduhm v2, v2, v3 563; P9BE-NEXT: blr 564; 565; P8LE-LABEL: combine_urem_udiv: 566; P8LE: # %bb.0: 567; P8LE-NEXT: xxswapd vs0, v2 568; P8LE-NEXT: lis r3, 22765 569; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill 570; P8LE-NEXT: ori r3, r3, 8969 571; P8LE-NEXT: mffprd r4, f0 572; P8LE-NEXT: clrldi r5, r4, 48 573; P8LE-NEXT: rldicl r6, r4, 48, 48 574; P8LE-NEXT: clrlwi r5, r5, 16 575; P8LE-NEXT: clrlwi r8, r6, 16 576; P8LE-NEXT: rldicl r7, r4, 32, 48 577; P8LE-NEXT: rldicl r4, r4, 16, 48 578; P8LE-NEXT: mulhwu r9, r5, r3 579; P8LE-NEXT: mulhwu r11, r8, r3 580; P8LE-NEXT: clrlwi r10, r7, 16 581; P8LE-NEXT: clrlwi r12, r4, 16 582; P8LE-NEXT: mulhwu r0, r10, r3 583; P8LE-NEXT: mulhwu r3, r12, r3 584; P8LE-NEXT: sub r30, r5, r9 585; P8LE-NEXT: sub r8, r8, r11 586; P8LE-NEXT: srwi r30, r30, 1 587; P8LE-NEXT: srwi r8, r8, 1 588; P8LE-NEXT: sub r10, r10, r0 589; P8LE-NEXT: add r9, r30, r9 590; P8LE-NEXT: add r8, r8, r11 591; P8LE-NEXT: sub r11, r12, r3 592; P8LE-NEXT: srwi r10, r10, 1 593; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 594; P8LE-NEXT: srwi r9, r9, 6 595; P8LE-NEXT: srwi r11, r11, 1 596; P8LE-NEXT: srwi r8, r8, 6 597; P8LE-NEXT: add r10, r10, r0 598; P8LE-NEXT: mulli r12, r9, 95 599; P8LE-NEXT: add r3, r11, r3 600; P8LE-NEXT: mtvsrd v2, r9 601; P8LE-NEXT: srwi r10, r10, 6 602; P8LE-NEXT: mulli r9, r8, 95 603; P8LE-NEXT: srwi r3, r3, 6 604; P8LE-NEXT: mtvsrd v3, r8 605; P8LE-NEXT: mulli r8, r10, 95 606; P8LE-NEXT: mtvsrd v4, r10 607; P8LE-NEXT: mulli r10, r3, 95 608; P8LE-NEXT: vmrghh v2, v3, v2 609; P8LE-NEXT: sub r5, r5, r12 610; P8LE-NEXT: sub r6, r6, r9 611; P8LE-NEXT: mtvsrd v3, r5 612; P8LE-NEXT: mtvsrd v5, r6 613; P8LE-NEXT: sub r5, r7, r8 614; P8LE-NEXT: sub r4, r4, r10 615; P8LE-NEXT: mtvsrd v0, r5 616; P8LE-NEXT: mtvsrd v1, r4 617; P8LE-NEXT: vmrghh v3, v5, v3 618; P8LE-NEXT: mtvsrd v5, r3 619; P8LE-NEXT: vmrghh v0, v1, v0 620; P8LE-NEXT: vmrghh v4, v5, v4 621; P8LE-NEXT: vmrglw v3, v0, v3 622; P8LE-NEXT: vmrglw v2, v4, v2 623; P8LE-NEXT: vadduhm v2, v3, v2 624; P8LE-NEXT: blr 625; 626; P8BE-LABEL: combine_urem_udiv: 627; P8BE: # %bb.0: 628; P8BE-NEXT: mfvsrd r4, v2 629; P8BE-NEXT: lis r3, 22765 630; P8BE-NEXT: ori r3, r3, 8969 631; P8BE-NEXT: clrldi r5, r4, 48 632; P8BE-NEXT: rldicl r6, r4, 48, 48 633; P8BE-NEXT: clrlwi r8, r5, 16 634; P8BE-NEXT: clrlwi r9, r6, 16 635; P8BE-NEXT: rldicl r7, r4, 32, 48 636; P8BE-NEXT: rldicl r4, r4, 16, 48 637; P8BE-NEXT: mulhwu r10, r8, r3 638; P8BE-NEXT: mulhwu r12, r9, r3 639; P8BE-NEXT: clrlwi r11, r7, 16 640; P8BE-NEXT: clrlwi r4, r4, 16 641; P8BE-NEXT: mulhwu r0, r11, r3 642; P8BE-NEXT: mulhwu r3, r4, r3 643; P8BE-NEXT: sub r8, r8, r10 644; P8BE-NEXT: sub r9, r9, r12 645; P8BE-NEXT: srwi r8, r8, 1 646; P8BE-NEXT: srwi r9, r9, 1 647; P8BE-NEXT: sub r11, r11, r0 648; P8BE-NEXT: add r8, r8, r10 649; P8BE-NEXT: add r9, r9, r12 650; P8BE-NEXT: sub r12, r4, r3 651; P8BE-NEXT: addis r10, r2, .LCPI2_0@toc@ha 652; P8BE-NEXT: srwi r11, r11, 1 653; P8BE-NEXT: srwi r8, r8, 6 654; P8BE-NEXT: srwi r12, r12, 1 655; P8BE-NEXT: srwi r9, r9, 6 656; P8BE-NEXT: addi r10, r10, .LCPI2_0@toc@l 657; P8BE-NEXT: add r11, r11, r0 658; P8BE-NEXT: mulli r0, r8, 95 659; P8BE-NEXT: add r3, r12, r3 660; P8BE-NEXT: mtvsrwz v3, r8 661; P8BE-NEXT: lxvw4x v2, 0, r10 662; P8BE-NEXT: srwi r10, r11, 6 663; P8BE-NEXT: mulli r8, r9, 95 664; P8BE-NEXT: srwi r3, r3, 6 665; P8BE-NEXT: mtvsrwz v4, r9 666; P8BE-NEXT: mulli r9, r10, 95 667; P8BE-NEXT: mtvsrwz v5, r10 668; P8BE-NEXT: mulli r10, r3, 95 669; P8BE-NEXT: vperm v3, v4, v3, v2 670; P8BE-NEXT: sub r5, r5, r0 671; P8BE-NEXT: sub r6, r6, r8 672; P8BE-NEXT: mtvsrwz v4, r5 673; P8BE-NEXT: mtvsrwz v0, r6 674; P8BE-NEXT: sub r5, r7, r9 675; P8BE-NEXT: sub r4, r4, r10 676; P8BE-NEXT: mtvsrwz v1, r5 677; P8BE-NEXT: mtvsrwz v6, r4 678; P8BE-NEXT: vperm v4, v0, v4, v2 679; P8BE-NEXT: mtvsrwz v0, r3 680; P8BE-NEXT: vperm v1, v6, v1, v2 681; P8BE-NEXT: vperm v2, v0, v5, v2 682; P8BE-NEXT: vmrghw v4, v1, v4 683; P8BE-NEXT: vmrghw v2, v2, v3 684; P8BE-NEXT: vadduhm v2, v4, v2 685; P8BE-NEXT: blr 686 %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 687 %2 = udiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 688 %3 = add <4 x i16> %1, %2 689 ret <4 x i16> %3 690} 691 692; Don't fold for divisors that are a power of two. 693define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { 694; P9LE-LABEL: dont_fold_urem_power_of_two: 695; P9LE: # %bb.0: 696; P9LE-NEXT: li r3, 0 697; P9LE-NEXT: lis r4, 22765 698; P9LE-NEXT: vextuhrx r3, r3, v2 699; P9LE-NEXT: ori r4, r4, 8969 700; P9LE-NEXT: clrlwi r3, r3, 26 701; P9LE-NEXT: mtvsrd v3, r3 702; P9LE-NEXT: li r3, 2 703; P9LE-NEXT: vextuhrx r3, r3, v2 704; P9LE-NEXT: clrlwi r3, r3, 27 705; P9LE-NEXT: mtvsrd v4, r3 706; P9LE-NEXT: li r3, 6 707; P9LE-NEXT: vextuhrx r3, r3, v2 708; P9LE-NEXT: vmrghh v3, v4, v3 709; P9LE-NEXT: clrlwi r3, r3, 16 710; P9LE-NEXT: mulhwu r4, r3, r4 711; P9LE-NEXT: sub r5, r3, r4 712; P9LE-NEXT: srwi r5, r5, 1 713; P9LE-NEXT: add r4, r5, r4 714; P9LE-NEXT: srwi r4, r4, 6 715; P9LE-NEXT: mulli r4, r4, 95 716; P9LE-NEXT: sub r3, r3, r4 717; P9LE-NEXT: mtvsrd v4, r3 718; P9LE-NEXT: li r3, 4 719; P9LE-NEXT: vextuhrx r3, r3, v2 720; P9LE-NEXT: clrlwi r3, r3, 29 721; P9LE-NEXT: mtvsrd v2, r3 722; P9LE-NEXT: vmrghh v2, v4, v2 723; P9LE-NEXT: vmrglw v2, v2, v3 724; P9LE-NEXT: blr 725; 726; P9BE-LABEL: dont_fold_urem_power_of_two: 727; P9BE: # %bb.0: 728; P9BE-NEXT: li r3, 2 729; P9BE-NEXT: lis r4, 22765 730; P9BE-NEXT: vextuhlx r3, r3, v2 731; P9BE-NEXT: ori r4, r4, 8969 732; P9BE-NEXT: clrlwi r3, r3, 27 733; P9BE-NEXT: mtvsrwz v3, r3 734; P9BE-NEXT: li r3, 0 735; P9BE-NEXT: vextuhlx r3, r3, v2 736; P9BE-NEXT: clrlwi r3, r3, 26 737; P9BE-NEXT: mtvsrwz v4, r3 738; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha 739; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l 740; P9BE-NEXT: lxv v5, 0(r3) 741; P9BE-NEXT: li r3, 6 742; P9BE-NEXT: vextuhlx r3, r3, v2 743; P9BE-NEXT: clrlwi r3, r3, 16 744; P9BE-NEXT: vperm v3, v4, v3, v5 745; P9BE-NEXT: mulhwu r4, r3, r4 746; P9BE-NEXT: sub r5, r3, r4 747; P9BE-NEXT: srwi r5, r5, 1 748; P9BE-NEXT: add r4, r5, r4 749; P9BE-NEXT: srwi r4, r4, 6 750; P9BE-NEXT: mulli r4, r4, 95 751; P9BE-NEXT: sub r3, r3, r4 752; P9BE-NEXT: mtvsrwz v4, r3 753; P9BE-NEXT: li r3, 4 754; P9BE-NEXT: vextuhlx r3, r3, v2 755; P9BE-NEXT: clrlwi r3, r3, 29 756; P9BE-NEXT: mtvsrwz v2, r3 757; P9BE-NEXT: vperm v2, v2, v4, v5 758; P9BE-NEXT: vmrghw v2, v3, v2 759; P9BE-NEXT: blr 760; 761; P8LE-LABEL: dont_fold_urem_power_of_two: 762; P8LE: # %bb.0: 763; P8LE-NEXT: xxswapd vs0, v2 764; P8LE-NEXT: lis r3, 22765 765; P8LE-NEXT: ori r3, r3, 8969 766; P8LE-NEXT: mffprd r4, f0 767; P8LE-NEXT: rldicl r5, r4, 16, 48 768; P8LE-NEXT: rldicl r7, r4, 48, 48 769; P8LE-NEXT: clrlwi r5, r5, 16 770; P8LE-NEXT: mulhwu r3, r5, r3 771; P8LE-NEXT: sub r6, r5, r3 772; P8LE-NEXT: srwi r6, r6, 1 773; P8LE-NEXT: add r3, r6, r3 774; P8LE-NEXT: clrldi r6, r4, 48 775; P8LE-NEXT: srwi r3, r3, 6 776; P8LE-NEXT: clrlwi r6, r6, 26 777; P8LE-NEXT: mulli r3, r3, 95 778; P8LE-NEXT: rldicl r4, r4, 32, 48 779; P8LE-NEXT: mtvsrd v2, r6 780; P8LE-NEXT: clrlwi r6, r7, 27 781; P8LE-NEXT: clrlwi r4, r4, 29 782; P8LE-NEXT: mtvsrd v3, r6 783; P8LE-NEXT: mtvsrd v5, r4 784; P8LE-NEXT: vmrghh v2, v3, v2 785; P8LE-NEXT: sub r3, r5, r3 786; P8LE-NEXT: mtvsrd v4, r3 787; P8LE-NEXT: vmrghh v3, v4, v5 788; P8LE-NEXT: vmrglw v2, v3, v2 789; P8LE-NEXT: blr 790; 791; P8BE-LABEL: dont_fold_urem_power_of_two: 792; P8BE: # %bb.0: 793; P8BE-NEXT: mfvsrd r4, v2 794; P8BE-NEXT: lis r3, 22765 795; P8BE-NEXT: addis r7, r2, .LCPI3_0@toc@ha 796; P8BE-NEXT: ori r3, r3, 8969 797; P8BE-NEXT: clrldi r5, r4, 48 798; P8BE-NEXT: rldicl r8, r4, 16, 48 799; P8BE-NEXT: clrlwi r5, r5, 16 800; P8BE-NEXT: mulhwu r3, r5, r3 801; P8BE-NEXT: sub r6, r5, r3 802; P8BE-NEXT: srwi r6, r6, 1 803; P8BE-NEXT: add r3, r6, r3 804; P8BE-NEXT: rldicl r6, r4, 32, 48 805; P8BE-NEXT: srwi r3, r3, 6 806; P8BE-NEXT: clrlwi r6, r6, 27 807; P8BE-NEXT: mulli r3, r3, 95 808; P8BE-NEXT: mtvsrwz v2, r6 809; P8BE-NEXT: addi r6, r7, .LCPI3_0@toc@l 810; P8BE-NEXT: rldicl r4, r4, 48, 48 811; P8BE-NEXT: clrlwi r7, r8, 26 812; P8BE-NEXT: lxvw4x v3, 0, r6 813; P8BE-NEXT: clrlwi r4, r4, 29 814; P8BE-NEXT: mtvsrwz v4, r7 815; P8BE-NEXT: mtvsrwz v0, r4 816; P8BE-NEXT: sub r3, r5, r3 817; P8BE-NEXT: vperm v2, v4, v2, v3 818; P8BE-NEXT: mtvsrwz v5, r3 819; P8BE-NEXT: vperm v3, v0, v5, v3 820; P8BE-NEXT: vmrghw v2, v2, v3 821; P8BE-NEXT: blr 822 %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95> 823 ret <4 x i16> %1 824} 825 826; Don't fold if the divisor is one. 827define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { 828; P9LE-LABEL: dont_fold_urem_one: 829; P9LE: # %bb.0: 830; P9LE-NEXT: li r3, 4 831; P9LE-NEXT: lis r4, -19946 832; P9LE-NEXT: lis r5, -14230 833; P9LE-NEXT: vextuhrx r3, r3, v2 834; P9LE-NEXT: ori r4, r4, 17097 835; P9LE-NEXT: ori r5, r5, 30865 836; P9LE-NEXT: clrlwi r3, r3, 16 837; P9LE-NEXT: mulhwu r4, r3, r4 838; P9LE-NEXT: srwi r4, r4, 4 839; P9LE-NEXT: mulli r4, r4, 23 840; P9LE-NEXT: sub r3, r3, r4 841; P9LE-NEXT: lis r4, 24749 842; P9LE-NEXT: mtvsrd v3, r3 843; P9LE-NEXT: li r3, 6 844; P9LE-NEXT: ori r4, r4, 47143 845; P9LE-NEXT: vextuhrx r3, r3, v2 846; P9LE-NEXT: clrlwi r3, r3, 16 847; P9LE-NEXT: mulhwu r4, r3, r4 848; P9LE-NEXT: srwi r4, r4, 11 849; P9LE-NEXT: mulli r4, r4, 5423 850; P9LE-NEXT: sub r3, r3, r4 851; P9LE-NEXT: mtvsrd v4, r3 852; P9LE-NEXT: li r3, 2 853; P9LE-NEXT: vextuhrx r3, r3, v2 854; P9LE-NEXT: vmrghh v3, v4, v3 855; P9LE-NEXT: clrlwi r4, r3, 16 856; P9LE-NEXT: rlwinm r3, r3, 31, 17, 31 857; P9LE-NEXT: mulhwu r3, r3, r5 858; P9LE-NEXT: srwi r3, r3, 8 859; P9LE-NEXT: mulli r3, r3, 654 860; P9LE-NEXT: sub r3, r4, r3 861; P9LE-NEXT: mtvsrd v2, r3 862; P9LE-NEXT: li r3, 0 863; P9LE-NEXT: mtvsrd v4, r3 864; P9LE-NEXT: vmrghh v2, v2, v4 865; P9LE-NEXT: vmrglw v2, v3, v2 866; P9LE-NEXT: blr 867; 868; P9BE-LABEL: dont_fold_urem_one: 869; P9BE: # %bb.0: 870; P9BE-NEXT: li r3, 6 871; P9BE-NEXT: lis r4, 24749 872; P9BE-NEXT: lis r5, -14230 873; P9BE-NEXT: vextuhlx r3, r3, v2 874; P9BE-NEXT: ori r4, r4, 47143 875; P9BE-NEXT: ori r5, r5, 30865 876; P9BE-NEXT: clrlwi r3, r3, 16 877; P9BE-NEXT: mulhwu r4, r3, r4 878; P9BE-NEXT: srwi r4, r4, 11 879; P9BE-NEXT: mulli r4, r4, 5423 880; P9BE-NEXT: sub r3, r3, r4 881; P9BE-NEXT: lis r4, -19946 882; P9BE-NEXT: mtvsrwz v3, r3 883; P9BE-NEXT: li r3, 4 884; P9BE-NEXT: ori r4, r4, 17097 885; P9BE-NEXT: vextuhlx r3, r3, v2 886; P9BE-NEXT: clrlwi r3, r3, 16 887; P9BE-NEXT: mulhwu r4, r3, r4 888; P9BE-NEXT: srwi r4, r4, 4 889; P9BE-NEXT: mulli r4, r4, 23 890; P9BE-NEXT: sub r3, r3, r4 891; P9BE-NEXT: mtvsrwz v4, r3 892; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha 893; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l 894; P9BE-NEXT: lxv v5, 0(r3) 895; P9BE-NEXT: li r3, 2 896; P9BE-NEXT: vextuhlx r3, r3, v2 897; P9BE-NEXT: clrlwi r4, r3, 16 898; P9BE-NEXT: rlwinm r3, r3, 31, 17, 31 899; P9BE-NEXT: vperm v3, v4, v3, v5 900; P9BE-NEXT: mulhwu r3, r3, r5 901; P9BE-NEXT: srwi r3, r3, 8 902; P9BE-NEXT: mulli r3, r3, 654 903; P9BE-NEXT: sub r3, r4, r3 904; P9BE-NEXT: mtvsrwz v2, r3 905; P9BE-NEXT: li r3, 0 906; P9BE-NEXT: mtvsrwz v4, r3 907; P9BE-NEXT: vperm v2, v4, v2, v5 908; P9BE-NEXT: vmrghw v2, v2, v3 909; P9BE-NEXT: blr 910; 911; P8LE-LABEL: dont_fold_urem_one: 912; P8LE: # %bb.0: 913; P8LE-NEXT: xxswapd vs0, v2 914; P8LE-NEXT: lis r3, -14230 915; P8LE-NEXT: lis r7, -19946 916; P8LE-NEXT: lis r9, 24749 917; P8LE-NEXT: ori r3, r3, 30865 918; P8LE-NEXT: ori r7, r7, 17097 919; P8LE-NEXT: mffprd r4, f0 920; P8LE-NEXT: rldicl r5, r4, 48, 48 921; P8LE-NEXT: rldicl r6, r4, 32, 48 922; P8LE-NEXT: rldicl r4, r4, 16, 48 923; P8LE-NEXT: rlwinm r8, r5, 31, 17, 31 924; P8LE-NEXT: clrlwi r6, r6, 16 925; P8LE-NEXT: clrlwi r5, r5, 16 926; P8LE-NEXT: mulhwu r3, r8, r3 927; P8LE-NEXT: ori r8, r9, 47143 928; P8LE-NEXT: clrlwi r4, r4, 16 929; P8LE-NEXT: li r9, 0 930; P8LE-NEXT: mulhwu r7, r6, r7 931; P8LE-NEXT: mulhwu r8, r4, r8 932; P8LE-NEXT: mtvsrd v2, r9 933; P8LE-NEXT: srwi r3, r3, 8 934; P8LE-NEXT: srwi r7, r7, 4 935; P8LE-NEXT: mulli r3, r3, 654 936; P8LE-NEXT: srwi r8, r8, 11 937; P8LE-NEXT: mulli r7, r7, 23 938; P8LE-NEXT: mulli r8, r8, 5423 939; P8LE-NEXT: sub r3, r5, r3 940; P8LE-NEXT: sub r5, r6, r7 941; P8LE-NEXT: mtvsrd v3, r3 942; P8LE-NEXT: sub r3, r4, r8 943; P8LE-NEXT: mtvsrd v4, r5 944; P8LE-NEXT: mtvsrd v5, r3 945; P8LE-NEXT: vmrghh v2, v3, v2 946; P8LE-NEXT: vmrghh v3, v5, v4 947; P8LE-NEXT: vmrglw v2, v3, v2 948; P8LE-NEXT: blr 949; 950; P8BE-LABEL: dont_fold_urem_one: 951; P8BE: # %bb.0: 952; P8BE-NEXT: mfvsrd r4, v2 953; P8BE-NEXT: lis r3, 24749 954; P8BE-NEXT: lis r7, -19946 955; P8BE-NEXT: lis r8, -14230 956; P8BE-NEXT: li r10, 0 957; P8BE-NEXT: ori r3, r3, 47143 958; P8BE-NEXT: ori r7, r7, 17097 959; P8BE-NEXT: ori r8, r8, 30865 960; P8BE-NEXT: mtvsrwz v2, r10 961; P8BE-NEXT: clrldi r5, r4, 48 962; P8BE-NEXT: rldicl r6, r4, 48, 48 963; P8BE-NEXT: clrlwi r5, r5, 16 964; P8BE-NEXT: rldicl r4, r4, 32, 48 965; P8BE-NEXT: clrlwi r6, r6, 16 966; P8BE-NEXT: mulhwu r3, r5, r3 967; P8BE-NEXT: rlwinm r9, r4, 31, 17, 31 968; P8BE-NEXT: mulhwu r7, r6, r7 969; P8BE-NEXT: mulhwu r8, r9, r8 970; P8BE-NEXT: addis r9, r2, .LCPI4_0@toc@ha 971; P8BE-NEXT: srwi r3, r3, 11 972; P8BE-NEXT: mulli r3, r3, 5423 973; P8BE-NEXT: srwi r7, r7, 4 974; P8BE-NEXT: srwi r8, r8, 8 975; P8BE-NEXT: mulli r7, r7, 23 976; P8BE-NEXT: mulli r8, r8, 654 977; P8BE-NEXT: sub r3, r5, r3 978; P8BE-NEXT: addi r5, r9, .LCPI4_0@toc@l 979; P8BE-NEXT: mtvsrwz v4, r3 980; P8BE-NEXT: clrlwi r3, r4, 16 981; P8BE-NEXT: lxvw4x v3, 0, r5 982; P8BE-NEXT: sub r5, r6, r7 983; P8BE-NEXT: sub r3, r3, r8 984; P8BE-NEXT: mtvsrwz v5, r5 985; P8BE-NEXT: mtvsrwz v0, r3 986; P8BE-NEXT: vperm v4, v5, v4, v3 987; P8BE-NEXT: vperm v2, v2, v0, v3 988; P8BE-NEXT: vmrghw v2, v2, v4 989; P8BE-NEXT: blr 990 %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423> 991 ret <4 x i16> %1 992} 993 994; Don't fold if the divisor is 2^16. 995define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { 996; CHECK-LABEL: dont_fold_urem_i16_smax: 997; CHECK: # %bb.0: 998; CHECK-NEXT: blr 999 %1 = urem <4 x i16> %x, <i16 1, i16 65536, i16 23, i16 5423> 1000 ret <4 x i16> %1 1001} 1002 1003; Don't fold i64 urem. 1004define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) { 1005; P9LE-LABEL: dont_fold_urem_i64: 1006; P9LE: # %bb.0: 1007; P9LE-NEXT: lis r4, 1602 1008; P9LE-NEXT: mfvsrld r3, v3 1009; P9LE-NEXT: ori r4, r4, 51289 1010; P9LE-NEXT: rldic r4, r4, 36, 1 1011; P9LE-NEXT: oris r4, r4, 45590 1012; P9LE-NEXT: ori r4, r4, 17097 1013; P9LE-NEXT: mulhdu r4, r3, r4 1014; P9LE-NEXT: sub r5, r3, r4 1015; P9LE-NEXT: rldicl r5, r5, 63, 1 1016; P9LE-NEXT: add r4, r5, r4 1017; P9LE-NEXT: lis r5, -16037 1018; P9LE-NEXT: rldicl r4, r4, 60, 4 1019; P9LE-NEXT: ori r5, r5, 28749 1020; P9LE-NEXT: mulli r4, r4, 23 1021; P9LE-NEXT: rldic r5, r5, 32, 0 1022; P9LE-NEXT: oris r5, r5, 52170 1023; P9LE-NEXT: ori r5, r5, 12109 1024; P9LE-NEXT: sub r3, r3, r4 1025; P9LE-NEXT: mfvsrd r4, v3 1026; P9LE-NEXT: mulhdu r5, r4, r5 1027; P9LE-NEXT: rldicl r5, r5, 52, 12 1028; P9LE-NEXT: mulli r5, r5, 5423 1029; P9LE-NEXT: sub r4, r4, r5 1030; P9LE-NEXT: lis r5, 3206 1031; P9LE-NEXT: ori r5, r5, 42889 1032; P9LE-NEXT: mtvsrdd v3, r4, r3 1033; P9LE-NEXT: mfvsrd r3, v2 1034; P9LE-NEXT: rldic r5, r5, 35, 1 1035; P9LE-NEXT: rldicl r4, r3, 63, 1 1036; P9LE-NEXT: oris r5, r5, 1603 1037; P9LE-NEXT: ori r5, r5, 21445 1038; P9LE-NEXT: mulhdu r4, r4, r5 1039; P9LE-NEXT: rldicl r4, r4, 57, 7 1040; P9LE-NEXT: mulli r4, r4, 654 1041; P9LE-NEXT: sub r3, r3, r4 1042; P9LE-NEXT: li r4, 0 1043; P9LE-NEXT: mtvsrdd v2, r3, r4 1044; P9LE-NEXT: blr 1045; 1046; P9BE-LABEL: dont_fold_urem_i64: 1047; P9BE: # %bb.0: 1048; P9BE-NEXT: lis r4, 1602 1049; P9BE-NEXT: mfvsrd r3, v3 1050; P9BE-NEXT: ori r4, r4, 51289 1051; P9BE-NEXT: rldic r4, r4, 36, 1 1052; P9BE-NEXT: oris r4, r4, 45590 1053; P9BE-NEXT: ori r4, r4, 17097 1054; P9BE-NEXT: mulhdu r4, r3, r4 1055; P9BE-NEXT: sub r5, r3, r4 1056; P9BE-NEXT: rldicl r5, r5, 63, 1 1057; P9BE-NEXT: add r4, r5, r4 1058; P9BE-NEXT: lis r5, -16037 1059; P9BE-NEXT: rldicl r4, r4, 60, 4 1060; P9BE-NEXT: ori r5, r5, 28749 1061; P9BE-NEXT: mulli r4, r4, 23 1062; P9BE-NEXT: rldic r5, r5, 32, 0 1063; P9BE-NEXT: oris r5, r5, 52170 1064; P9BE-NEXT: ori r5, r5, 12109 1065; P9BE-NEXT: sub r3, r3, r4 1066; P9BE-NEXT: mfvsrld r4, v3 1067; P9BE-NEXT: mulhdu r5, r4, r5 1068; P9BE-NEXT: rldicl r5, r5, 52, 12 1069; P9BE-NEXT: mulli r5, r5, 5423 1070; P9BE-NEXT: sub r4, r4, r5 1071; P9BE-NEXT: lis r5, 3206 1072; P9BE-NEXT: ori r5, r5, 42889 1073; P9BE-NEXT: mtvsrdd v3, r3, r4 1074; P9BE-NEXT: mfvsrld r3, v2 1075; P9BE-NEXT: rldic r5, r5, 35, 1 1076; P9BE-NEXT: rldicl r4, r3, 63, 1 1077; P9BE-NEXT: oris r5, r5, 1603 1078; P9BE-NEXT: ori r5, r5, 21445 1079; P9BE-NEXT: mulhdu r4, r4, r5 1080; P9BE-NEXT: rldicl r4, r4, 57, 7 1081; P9BE-NEXT: mulli r4, r4, 654 1082; P9BE-NEXT: sub r3, r3, r4 1083; P9BE-NEXT: mtvsrdd v2, 0, r3 1084; P9BE-NEXT: blr 1085; 1086; P8LE-LABEL: dont_fold_urem_i64: 1087; P8LE: # %bb.0: 1088; P8LE-NEXT: lis r3, 1602 1089; P8LE-NEXT: xxswapd vs0, v3 1090; P8LE-NEXT: lis r4, -16037 1091; P8LE-NEXT: lis r5, 3206 1092; P8LE-NEXT: mfvsrd r6, v2 1093; P8LE-NEXT: ori r3, r3, 51289 1094; P8LE-NEXT: ori r4, r4, 28749 1095; P8LE-NEXT: ori r5, r5, 42889 1096; P8LE-NEXT: mfvsrd r8, v3 1097; P8LE-NEXT: rldic r3, r3, 36, 1 1098; P8LE-NEXT: rldic r4, r4, 32, 0 1099; P8LE-NEXT: oris r3, r3, 45590 1100; P8LE-NEXT: mffprd r7, f0 1101; P8LE-NEXT: rldic r5, r5, 35, 1 1102; P8LE-NEXT: oris r4, r4, 52170 1103; P8LE-NEXT: ori r3, r3, 17097 1104; P8LE-NEXT: oris r5, r5, 1603 1105; P8LE-NEXT: ori r4, r4, 12109 1106; P8LE-NEXT: mulhdu r3, r7, r3 1107; P8LE-NEXT: rldicl r9, r6, 63, 1 1108; P8LE-NEXT: ori r5, r5, 21445 1109; P8LE-NEXT: mulhdu r4, r8, r4 1110; P8LE-NEXT: mulhdu r5, r9, r5 1111; P8LE-NEXT: sub r9, r7, r3 1112; P8LE-NEXT: rldicl r9, r9, 63, 1 1113; P8LE-NEXT: rldicl r4, r4, 52, 12 1114; P8LE-NEXT: add r3, r9, r3 1115; P8LE-NEXT: rldicl r5, r5, 57, 7 1116; P8LE-NEXT: mulli r4, r4, 5423 1117; P8LE-NEXT: rldicl r3, r3, 60, 4 1118; P8LE-NEXT: mulli r5, r5, 654 1119; P8LE-NEXT: mulli r3, r3, 23 1120; P8LE-NEXT: sub r4, r8, r4 1121; P8LE-NEXT: sub r5, r6, r5 1122; P8LE-NEXT: mtfprd f0, r4 1123; P8LE-NEXT: sub r3, r7, r3 1124; P8LE-NEXT: li r4, 0 1125; P8LE-NEXT: mtfprd f1, r5 1126; P8LE-NEXT: mtfprd f2, r3 1127; P8LE-NEXT: mtfprd f3, r4 1128; P8LE-NEXT: xxmrghd v3, vs0, vs2 1129; P8LE-NEXT: xxmrghd v2, vs1, vs3 1130; P8LE-NEXT: blr 1131; 1132; P8BE-LABEL: dont_fold_urem_i64: 1133; P8BE: # %bb.0: 1134; P8BE-NEXT: lis r3, 1602 1135; P8BE-NEXT: lis r4, -16037 1136; P8BE-NEXT: xxswapd vs0, v3 1137; P8BE-NEXT: xxswapd vs1, v2 1138; P8BE-NEXT: lis r5, 3206 1139; P8BE-NEXT: ori r3, r3, 51289 1140; P8BE-NEXT: ori r4, r4, 28749 1141; P8BE-NEXT: mfvsrd r6, v3 1142; P8BE-NEXT: ori r5, r5, 42889 1143; P8BE-NEXT: rldic r3, r3, 36, 1 1144; P8BE-NEXT: rldic r4, r4, 32, 0 1145; P8BE-NEXT: oris r3, r3, 45590 1146; P8BE-NEXT: rldic r5, r5, 35, 1 1147; P8BE-NEXT: mffprd r7, f0 1148; P8BE-NEXT: oris r4, r4, 52170 1149; P8BE-NEXT: ori r3, r3, 17097 1150; P8BE-NEXT: mffprd r8, f1 1151; P8BE-NEXT: oris r5, r5, 1603 1152; P8BE-NEXT: ori r4, r4, 12109 1153; P8BE-NEXT: mulhdu r3, r6, r3 1154; P8BE-NEXT: ori r5, r5, 21445 1155; P8BE-NEXT: mulhdu r4, r7, r4 1156; P8BE-NEXT: rldicl r9, r8, 63, 1 1157; P8BE-NEXT: mulhdu r5, r9, r5 1158; P8BE-NEXT: sub r9, r6, r3 1159; P8BE-NEXT: rldicl r9, r9, 63, 1 1160; P8BE-NEXT: rldicl r4, r4, 52, 12 1161; P8BE-NEXT: add r3, r9, r3 1162; P8BE-NEXT: mulli r4, r4, 5423 1163; P8BE-NEXT: rldicl r5, r5, 57, 7 1164; P8BE-NEXT: rldicl r3, r3, 60, 4 1165; P8BE-NEXT: mulli r5, r5, 654 1166; P8BE-NEXT: mulli r3, r3, 23 1167; P8BE-NEXT: sub r4, r7, r4 1168; P8BE-NEXT: mtfprd f0, r4 1169; P8BE-NEXT: sub r4, r8, r5 1170; P8BE-NEXT: sub r3, r6, r3 1171; P8BE-NEXT: mtfprd f1, r4 1172; P8BE-NEXT: li r4, 0 1173; P8BE-NEXT: mtfprd f2, r3 1174; P8BE-NEXT: mtfprd f3, r4 1175; P8BE-NEXT: xxmrghd v3, vs2, vs0 1176; P8BE-NEXT: xxmrghd v2, vs3, vs1 1177; P8BE-NEXT: blr 1178 %1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423> 1179 ret <4 x i64> %1 1180} 1181