1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s 4 5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. 6 7define i64 @v_urem_i64(i64 %num, i64 %den) { 8; CHECK-LABEL: v_urem_i64: 9; CHECK: ; %bb.0: 10; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; CHECK-NEXT: v_or_b32_e32 v5, v1, v3 12; CHECK-NEXT: v_mov_b32_e32 v4, 0 13; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 14; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 15; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] 16; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 17; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 18; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7] 19; CHECK-NEXT: s_cbranch_execz BB0_2 20; CHECK-NEXT: ; %bb.1: 21; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2 22; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v3 23; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v2 24; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc 25; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 26; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 27; CHECK-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 28; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 29; CHECK-NEXT: v_trunc_f32_e32 v5, v5 30; CHECK-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 31; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 32; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 33; CHECK-NEXT: v_mul_lo_u32 v8, v6, v5 34; CHECK-NEXT: v_mul_lo_u32 v9, v6, v4 35; CHECK-NEXT: v_mul_lo_u32 v10, v7, v4 36; CHECK-NEXT: v_mul_hi_u32 v11, v6, v4 37; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 38; CHECK-NEXT: v_mul_lo_u32 v10, v5, v9 39; CHECK-NEXT: v_mul_hi_u32 v12, v4, v9 40; CHECK-NEXT: v_mul_hi_u32 v9, v5, v9 41; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 42; CHECK-NEXT: v_mul_lo_u32 v11, v4, v8 43; CHECK-NEXT: v_mul_lo_u32 v13, v5, v8 44; CHECK-NEXT: v_mul_hi_u32 v14, v4, v8 45; CHECK-NEXT: v_mul_hi_u32 v8, v5, v8 46; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 47; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 48; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 49; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 50; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 51; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 52; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 53; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 54; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 55; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 56; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 57; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 58; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 59; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 60; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9 61; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], v5, v8, vcc 62; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v8 63; CHECK-NEXT: v_mul_lo_u32 v8, v6, v4 64; CHECK-NEXT: v_mul_lo_u32 v7, v7, v4 65; CHECK-NEXT: v_mul_hi_u32 v10, v6, v4 66; CHECK-NEXT: v_mul_lo_u32 v6, v6, v9 67; CHECK-NEXT: v_mul_lo_u32 v11, v9, v8 68; CHECK-NEXT: v_mul_hi_u32 v12, v4, v8 69; CHECK-NEXT: v_mul_hi_u32 v8, v9, v8 70; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 71; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 72; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 73; CHECK-NEXT: v_mul_lo_u32 v10, v9, v6 74; CHECK-NEXT: v_mul_hi_u32 v13, v4, v6 75; CHECK-NEXT: v_mul_hi_u32 v6, v9, v6 76; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 77; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 78; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v10, v8 79; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 80; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v12 81; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 82; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v13 83; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 84; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v7 85; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11 86; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 87; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 88; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 89; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 90; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc 91; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 92; CHECK-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 93; CHECK-NEXT: v_mul_lo_u32 v6, v1, v4 94; CHECK-NEXT: v_mul_hi_u32 v7, v0, v4 95; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 96; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5 97; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 98; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 99; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 100; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 101; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 102; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 103; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 104; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 105; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 106; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 107; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 108; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 109; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 110; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 111; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 112; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 113; CHECK-NEXT: v_mul_lo_u32 v7, v2, v4 114; CHECK-NEXT: v_mul_lo_u32 v8, v3, v4 115; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 116; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 117; CHECK-NEXT: v_mul_lo_u32 v5, v2, v5 118; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 119; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 120; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v7 121; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v4, vcc 122; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v4 123; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v2 124; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 125; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v3 126; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 127; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 128; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v6, v3 129; CHECK-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc 130; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v5, v2 131; CHECK-NEXT: v_subbrev_u32_e64 v8, s[4:5], 0, v1, vcc 132; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v2 133; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 134; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 135; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v8, v3 136; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc 137; CHECK-NEXT: v_sub_i32_e32 v11, vcc, v7, v2 138; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 139; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v8, v3 140; CHECK-NEXT: v_cndmask_b32_e32 v3, v10, v9, vcc 141; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 142; CHECK-NEXT: v_cndmask_b32_e32 v3, v7, v11, vcc 143; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc 144; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 145; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc 146; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v1, vcc 147; CHECK-NEXT: BB0_2: ; %Flow 148; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] 149; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] 150; CHECK-NEXT: s_cbranch_execz BB0_4 151; CHECK-NEXT: ; %bb.3: 152; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2 153; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v2 154; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 155; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 156; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 157; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1 158; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 159; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 160; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 161; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 162; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 163; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2 164; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 165; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 166; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2 167; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 168; CHECK-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 169; CHECK-NEXT: v_mov_b32_e32 v5, 0 170; CHECK-NEXT: BB0_4: 171; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 172; CHECK-NEXT: v_mov_b32_e32 v0, v4 173; CHECK-NEXT: v_mov_b32_e32 v1, v5 174; CHECK-NEXT: s_setpc_b64 s[30:31] 175 %result = urem i64 %num, %den 176 ret i64 %result 177} 178 179; FIXME: This is a workaround for not handling uniform VGPR case. 180declare i32 @llvm.amdgcn.readfirstlane(i32) 181 182define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { 183; CHECK-LABEL: s_urem_i64: 184; CHECK: ; %bb.0: 185; CHECK-NEXT: s_or_b64 s[4:5], s[0:1], s[2:3] 186; CHECK-NEXT: s_mov_b32 s6, 0 187; CHECK-NEXT: s_mov_b32 s7, -1 188; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 189; CHECK-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], 0 190; CHECK-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, 1 191; CHECK-NEXT: s_xor_b64 vcc, s[4:5], s[6:7] 192; CHECK-NEXT: s_mov_b32 s4, 1 193; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 194; CHECK-NEXT: s_cbranch_vccz BB1_2 195; CHECK-NEXT: ; %bb.1: 196; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 197; CHECK-NEXT: v_mov_b32_e32 v1, s3 198; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3 199; CHECK-NEXT: s_sub_u32 s6, 0, s2 200; CHECK-NEXT: s_cselect_b32 s4, 1, 0 201; CHECK-NEXT: v_mov_b32_e32 v3, s1 202; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2 203; CHECK-NEXT: s_and_b32 s4, s4, 1 204; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 205; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 206; CHECK-NEXT: s_cmp_lg_u32 s4, 0 207; CHECK-NEXT: s_subb_u32 s7, 0, s3 208; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 209; CHECK-NEXT: v_trunc_f32_e32 v2, v2 210; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 211; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 212; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 213; CHECK-NEXT: v_mul_lo_u32 v4, s6, v2 214; CHECK-NEXT: v_mul_lo_u32 v5, s6, v0 215; CHECK-NEXT: v_mul_lo_u32 v6, s7, v0 216; CHECK-NEXT: v_mul_hi_u32 v7, s6, v0 217; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 218; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 219; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5 220; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 221; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 222; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 223; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 224; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 225; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 226; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 227; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 228; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 229; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 230; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 231; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 232; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 233; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 234; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 235; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 236; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 237; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 238; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 239; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 240; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 241; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v2, v4, vcc 242; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v2, v4 243; CHECK-NEXT: v_mul_lo_u32 v4, s6, v0 244; CHECK-NEXT: v_mul_lo_u32 v6, s7, v0 245; CHECK-NEXT: v_mul_hi_u32 v7, s6, v0 246; CHECK-NEXT: v_mul_lo_u32 v8, s6, v5 247; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 248; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 249; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 250; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 251; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 252; CHECK-NEXT: v_mul_lo_u32 v7, v0, v6 253; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 254; CHECK-NEXT: v_mul_hi_u32 v11, v0, v6 255; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 256; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 257; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 258; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 259; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 260; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 261; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 262; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 263; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 264; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 265; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 266; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 267; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 268; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 269; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 270; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc 271; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 272; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc 273; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0 274; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 275; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0 276; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2 277; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2 278; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2 279; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2 280; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 281; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 282; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 283; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 284; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 285; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 286; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 287; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 288; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 289; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 290; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 291; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 292; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 293; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0 294; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0 295; CHECK-NEXT: v_mul_hi_u32 v0, s2, v0 296; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 297; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2 298; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 299; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 300; CHECK-NEXT: v_sub_i32_e32 v2, vcc, s0, v5 301; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v0, vcc 302; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], s1, v0 303; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v2 304; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 305; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 306; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] 307; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v1, vcc 308; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 309; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc 310; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v2 311; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc 312; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 313; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 314; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s2, v3 315; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 316; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc 317; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0 318; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc 319; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 320; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc 321; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 322; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 323; CHECK-NEXT: s_mov_b32 s4, 0 324; CHECK-NEXT: BB1_2: ; %Flow 325; CHECK-NEXT: s_and_b32 s1, s4, 1 326; CHECK-NEXT: s_cmp_lg_u32 s1, 0 327; CHECK-NEXT: s_cbranch_scc0 BB1_4 328; CHECK-NEXT: ; %bb.3: 329; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 330; CHECK-NEXT: s_sub_i32 s1, 0, s2 331; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 332; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 333; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 334; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0 335; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 336; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 337; CHECK-NEXT: v_mul_hi_u32 v0, s0, v0 338; CHECK-NEXT: v_mul_lo_u32 v0, v0, s2 339; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 340; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0 341; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0 342; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 343; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0 344; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0 345; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 346; CHECK-NEXT: BB1_4: 347; CHECK-NEXT: v_readfirstlane_b32 s0, v0 348; CHECK-NEXT: s_mov_b32 s1, s0 349; CHECK-NEXT: ; return to shader part epilog 350 %result = urem i64 %num, %den 351 %cast = bitcast i64 %result to <2 x i32> 352 %elt.0 = extractelement <2 x i32> %cast, i32 0 353 %elt.1 = extractelement <2 x i32> %cast, i32 1 354 %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0) 355 %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1) 356 %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0 357 %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1 358 %cast.back = bitcast <2 x i32> %ins.1 to i64 359 ret i64 %cast.back 360} 361 362define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { 363; GISEL-LABEL: v_urem_v2i64: 364; GISEL: ; %bb.0: 365; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 366; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 367; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 368; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 369; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 370; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 371; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 372; GISEL-NEXT: v_trunc_f32_e32 v9, v9 373; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 374; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 375; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 376; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 377; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc 378; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 379; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8 380; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 381; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 382; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 383; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 384; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 385; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13 386; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12 387; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 388; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 389; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 390; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 391; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 392; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 393; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 394; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 395; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 396; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 397; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 398; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 399; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 400; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 401; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 402; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 403; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 404; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 405; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 406; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v13, vcc 407; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 408; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8 409; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8 410; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 411; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8 412; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 413; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 414; GISEL-NEXT: v_mul_lo_u32 v11, v12, v13 415; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 416; GISEL-NEXT: v_mul_hi_u32 v15, v8, v13 417; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 418; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 419; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 420; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 421; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 422; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 423; GISEL-NEXT: v_mul_hi_u32 v13, v12, v13 424; GISEL-NEXT: v_mul_hi_u32 v15, v8, v10 425; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 426; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 427; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 428; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 429; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 430; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 431; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 432; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 433; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10 434; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 435; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 436; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc 437; GISEL-NEXT: v_addc_u32_e64 v9, vcc, 0, v9, s[4:5] 438; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8 439; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 440; GISEL-NEXT: v_mul_hi_u32 v12, v0, v8 441; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 442; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 443; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 444; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 445; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 446; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 447; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 448; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 449; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 450; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 451; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 452; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 453; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 454; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 455; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 456; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 457; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 458; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 459; GISEL-NEXT: v_mul_lo_u32 v10, v4, v8 460; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 461; GISEL-NEXT: v_mul_lo_u32 v9, v4, v9 462; GISEL-NEXT: v_mul_hi_u32 v8, v4, v8 463; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 464; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 465; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 466; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc 467; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 468; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v5 469; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 470; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 471; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 472; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v5 473; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] 474; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], v0, v4 475; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc 476; GISEL-NEXT: v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5] 477; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 478; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 479; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v4 480; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc 481; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v11, v5 482; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 483; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v10, v4 484; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5] 485; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 486; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 487; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc 488; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc 489; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 490; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 491; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc 492; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v6 493; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v7 494; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 495; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 496; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 497; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 498; GISEL-NEXT: v_trunc_f32_e32 v5, v5 499; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 500; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 501; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 502; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 503; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc 504; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 505; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 506; GISEL-NEXT: v_mul_lo_u32 v12, v8, v5 507; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4 508; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 509; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 510; GISEL-NEXT: v_mul_lo_u32 v12, v5, v10 511; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11 512; GISEL-NEXT: v_mul_hi_u32 v14, v4, v10 513; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 514; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 515; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 516; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 517; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 518; GISEL-NEXT: v_mul_lo_u32 v13, v5, v11 519; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10 520; GISEL-NEXT: v_mul_hi_u32 v14, v4, v11 521; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 522; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 523; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 524; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 525; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 526; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 527; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 528; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 529; GISEL-NEXT: v_mul_hi_u32 v11, v5, v11 530; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 531; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 532; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v5, v11, vcc 533; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v11 534; GISEL-NEXT: v_mul_lo_u32 v11, v8, v4 535; GISEL-NEXT: v_mul_lo_u32 v9, v9, v4 536; GISEL-NEXT: v_mul_lo_u32 v12, v8, v10 537; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 538; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 539; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 540; GISEL-NEXT: v_mul_lo_u32 v9, v10, v11 541; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 542; GISEL-NEXT: v_mul_hi_u32 v13, v4, v11 543; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 544; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 545; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 546; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 547; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 548; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 549; GISEL-NEXT: v_mul_hi_u32 v11, v10, v11 550; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 551; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 552; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 553; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 554; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 555; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13 556; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 557; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 558; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 559; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 560; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 561; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v9 562; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc 563; GISEL-NEXT: v_addc_u32_e64 v5, vcc, 0, v5, s[4:5] 564; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 565; GISEL-NEXT: v_mul_lo_u32 v9, v2, v5 566; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 567; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 568; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 569; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 570; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 571; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 572; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 573; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 574; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 575; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 576; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 577; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 578; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 579; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 580; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 581; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 582; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 583; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 584; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 585; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4 586; GISEL-NEXT: v_mul_lo_u32 v9, v7, v4 587; GISEL-NEXT: v_mul_lo_u32 v5, v6, v5 588; GISEL-NEXT: v_mul_hi_u32 v4, v6, v4 589; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 590; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 591; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 592; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc 593; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 594; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v7 595; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 596; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 597; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 598; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v7 599; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5] 600; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v6 601; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc 602; GISEL-NEXT: v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5] 603; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v7 604; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc 605; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v6 606; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 607; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v7 608; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 609; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v8, v6 610; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5] 611; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 612; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 613; GISEL-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc 614; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc 615; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 616; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 617; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 618; GISEL-NEXT: s_setpc_b64 s[30:31] 619; 620; CGP-LABEL: v_urem_v2i64: 621; CGP: ; %bb.0: 622; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 623; CGP-NEXT: v_mov_b32_e32 v8, v0 624; CGP-NEXT: v_mov_b32_e32 v9, v1 625; CGP-NEXT: v_or_b32_e32 v1, v9, v5 626; CGP-NEXT: v_mov_b32_e32 v0, 0 627; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 628; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 629; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] 630; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 631; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 632; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] 633; CGP-NEXT: s_cbranch_execz BB2_2 634; CGP-NEXT: ; %bb.1: 635; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 636; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5 637; CGP-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 638; CGP-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc 639; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 640; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 641; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 642; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 643; CGP-NEXT: v_trunc_f32_e32 v1, v1 644; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 645; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1 646; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 647; CGP-NEXT: v_mul_lo_u32 v12, v10, v1 648; CGP-NEXT: v_mul_lo_u32 v13, v10, v0 649; CGP-NEXT: v_mul_lo_u32 v14, v11, v0 650; CGP-NEXT: v_mul_hi_u32 v15, v10, v0 651; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 652; CGP-NEXT: v_mul_lo_u32 v14, v1, v13 653; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 654; CGP-NEXT: v_mul_hi_u32 v13, v1, v13 655; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 656; CGP-NEXT: v_mul_lo_u32 v15, v0, v12 657; CGP-NEXT: v_mul_lo_u32 v17, v1, v12 658; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 659; CGP-NEXT: v_mul_hi_u32 v12, v1, v12 660; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 661; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 662; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 663; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 664; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 665; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 666; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 667; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 668; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 669; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 670; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 671; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 672; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 673; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 674; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 675; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], v1, v12, vcc 676; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v1, v12 677; CGP-NEXT: v_mul_lo_u32 v12, v10, v0 678; CGP-NEXT: v_mul_lo_u32 v11, v11, v0 679; CGP-NEXT: v_mul_hi_u32 v14, v10, v0 680; CGP-NEXT: v_mul_lo_u32 v10, v10, v13 681; CGP-NEXT: v_mul_lo_u32 v15, v13, v12 682; CGP-NEXT: v_mul_hi_u32 v16, v0, v12 683; CGP-NEXT: v_mul_hi_u32 v12, v13, v12 684; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 685; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 686; CGP-NEXT: v_mul_lo_u32 v11, v0, v10 687; CGP-NEXT: v_mul_lo_u32 v14, v13, v10 688; CGP-NEXT: v_mul_hi_u32 v17, v0, v10 689; CGP-NEXT: v_mul_hi_u32 v10, v13, v10 690; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v15, v11 691; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 692; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 693; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 694; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 695; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 696; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 697; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 698; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 699; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v15 700; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 701; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 702; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 703; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v12 704; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v10, vcc 705; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v11 706; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 707; CGP-NEXT: v_mul_lo_u32 v10, v9, v0 708; CGP-NEXT: v_mul_hi_u32 v11, v8, v0 709; CGP-NEXT: v_mul_hi_u32 v0, v9, v0 710; CGP-NEXT: v_mul_lo_u32 v12, v8, v1 711; CGP-NEXT: v_mul_lo_u32 v13, v9, v1 712; CGP-NEXT: v_mul_hi_u32 v14, v8, v1 713; CGP-NEXT: v_mul_hi_u32 v1, v9, v1 714; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 715; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 716; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 717; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 718; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 719; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 720; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 721; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 722; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 723; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 724; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10 725; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 726; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 727; CGP-NEXT: v_mul_lo_u32 v11, v4, v0 728; CGP-NEXT: v_mul_lo_u32 v12, v5, v0 729; CGP-NEXT: v_mul_hi_u32 v0, v4, v0 730; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v10 731; CGP-NEXT: v_mul_lo_u32 v1, v4, v1 732; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 733; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 734; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v11 735; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v9, v0, vcc 736; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v9, v0 737; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4 738; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 739; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 740; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 741; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc 742; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v5 743; CGP-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc 744; CGP-NEXT: v_sub_i32_e32 v11, vcc, v1, v4 745; CGP-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v0, vcc 746; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v4 747; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] 748; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc 749; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v5 750; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc 751; CGP-NEXT: v_sub_i32_e32 v15, vcc, v11, v4 752; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc 753; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v5 754; CGP-NEXT: v_cndmask_b32_e32 v5, v14, v13, vcc 755; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 756; CGP-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc 757; CGP-NEXT: v_cndmask_b32_e32 v11, v12, v0, vcc 758; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 759; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc 760; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc 761; CGP-NEXT: BB2_2: ; %Flow2 762; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] 763; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] 764; CGP-NEXT: s_cbranch_execz BB2_4 765; CGP-NEXT: ; %bb.3: 766; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 767; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4 768; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 769; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 770; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 771; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 772; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 773; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 774; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 775; CGP-NEXT: v_mul_lo_u32 v0, v0, v4 776; CGP-NEXT: v_sub_i32_e32 v0, vcc, v8, v0 777; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 778; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 779; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 780; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 781; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 782; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 783; CGP-NEXT: v_mov_b32_e32 v1, 0 784; CGP-NEXT: BB2_4: 785; CGP-NEXT: s_or_b64 exec, exec, s[4:5] 786; CGP-NEXT: v_or_b32_e32 v5, v3, v7 787; CGP-NEXT: v_mov_b32_e32 v4, 0 788; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 789; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 790; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] 791; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 792; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 793; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] 794; CGP-NEXT: s_cbranch_execz BB2_6 795; CGP-NEXT: ; %bb.5: 796; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6 797; CGP-NEXT: v_cvt_f32_u32_e32 v5, v7 798; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 799; CGP-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc 800; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 801; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 802; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 803; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 804; CGP-NEXT: v_trunc_f32_e32 v5, v5 805; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 806; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 807; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 808; CGP-NEXT: v_mul_lo_u32 v10, v8, v5 809; CGP-NEXT: v_mul_lo_u32 v11, v8, v4 810; CGP-NEXT: v_mul_lo_u32 v12, v9, v4 811; CGP-NEXT: v_mul_hi_u32 v13, v8, v4 812; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 813; CGP-NEXT: v_mul_lo_u32 v12, v5, v11 814; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 815; CGP-NEXT: v_mul_hi_u32 v11, v5, v11 816; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 817; CGP-NEXT: v_mul_lo_u32 v13, v4, v10 818; CGP-NEXT: v_mul_lo_u32 v15, v5, v10 819; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 820; CGP-NEXT: v_mul_hi_u32 v10, v5, v10 821; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 822; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 823; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 824; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 825; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 826; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 827; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 828; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 829; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 830; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 831; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 832; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 833; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 834; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 835; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 836; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v5, v10, vcc 837; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v10 838; CGP-NEXT: v_mul_lo_u32 v10, v8, v4 839; CGP-NEXT: v_mul_lo_u32 v9, v9, v4 840; CGP-NEXT: v_mul_hi_u32 v12, v8, v4 841; CGP-NEXT: v_mul_lo_u32 v8, v8, v11 842; CGP-NEXT: v_mul_lo_u32 v13, v11, v10 843; CGP-NEXT: v_mul_hi_u32 v14, v4, v10 844; CGP-NEXT: v_mul_hi_u32 v10, v11, v10 845; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 846; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12 847; CGP-NEXT: v_mul_lo_u32 v9, v4, v8 848; CGP-NEXT: v_mul_lo_u32 v12, v11, v8 849; CGP-NEXT: v_mul_hi_u32 v15, v4, v8 850; CGP-NEXT: v_mul_hi_u32 v8, v11, v8 851; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v13, v9 852; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 853; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 854; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 855; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v14 856; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 857; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 858; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 859; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 860; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v13 861; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v10, v9 862; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 863; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 864; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 865; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc 866; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v9 867; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 868; CGP-NEXT: v_mul_lo_u32 v8, v3, v4 869; CGP-NEXT: v_mul_hi_u32 v9, v2, v4 870; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 871; CGP-NEXT: v_mul_lo_u32 v10, v2, v5 872; CGP-NEXT: v_mul_lo_u32 v11, v3, v5 873; CGP-NEXT: v_mul_hi_u32 v12, v2, v5 874; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 875; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 876; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 877; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 878; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 879; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 880; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 881; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 882; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 883; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 884; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 885; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 886; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 887; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 888; CGP-NEXT: v_mul_lo_u32 v9, v6, v4 889; CGP-NEXT: v_mul_lo_u32 v10, v7, v4 890; CGP-NEXT: v_mul_hi_u32 v4, v6, v4 891; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 892; CGP-NEXT: v_mul_lo_u32 v5, v6, v5 893; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 894; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 895; CGP-NEXT: v_sub_i32_e32 v5, vcc, v2, v9 896; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v4, vcc 897; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 898; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v6 899; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 900; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7 901; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 902; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc 903; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v7 904; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc 905; CGP-NEXT: v_sub_i32_e32 v9, vcc, v5, v6 906; CGP-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc 907; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v6 908; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 909; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc 910; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v10, v7 911; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 912; CGP-NEXT: v_sub_i32_e32 v13, vcc, v9, v6 913; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 914; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v7 915; CGP-NEXT: v_cndmask_b32_e32 v7, v12, v11, vcc 916; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 917; CGP-NEXT: v_cndmask_b32_e32 v7, v9, v13, vcc 918; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc 919; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 920; CGP-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc 921; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v3, vcc 922; CGP-NEXT: BB2_6: ; %Flow 923; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] 924; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] 925; CGP-NEXT: s_cbranch_execz BB2_8 926; CGP-NEXT: ; %bb.7: 927; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 928; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6 929; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 930; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 931; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 932; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 933; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 934; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 935; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 936; CGP-NEXT: v_mul_lo_u32 v3, v3, v6 937; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 938; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6 939; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 940; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 941; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6 942; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 943; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc 944; CGP-NEXT: v_mov_b32_e32 v5, 0 945; CGP-NEXT: BB2_8: 946; CGP-NEXT: s_or_b64 exec, exec, s[4:5] 947; CGP-NEXT: v_mov_b32_e32 v2, v4 948; CGP-NEXT: v_mov_b32_e32 v3, v5 949; CGP-NEXT: s_setpc_b64 s[30:31] 950 %result = urem <2 x i64> %num, %den 951 ret <2 x i64> %result 952} 953 954define i64 @v_urem_i64_pow2k_denom(i64 %num) { 955; CHECK-LABEL: v_urem_i64_pow2k_denom: 956; CHECK: ; %bb.0: 957; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 958; CHECK-NEXT: s_movk_i32 s6, 0x1000 959; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0 960; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 961; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 962; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 963; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 964; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 965; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 966; CHECK-NEXT: v_trunc_f32_e32 v3, v3 967; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 968; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 969; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 970; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 971; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 972; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 973; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 974; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 975; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 976; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 977; CHECK-NEXT: v_mul_hi_u32 v5, v3, v5 978; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 979; CHECK-NEXT: v_mul_lo_u32 v7, v2, v4 980; CHECK-NEXT: v_mul_lo_u32 v9, v3, v4 981; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 982; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4 983; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 984; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 985; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 986; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 987; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 988; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 989; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 990; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 991; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 992; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 993; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 994; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 995; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 996; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 997; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 998; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc 999; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 1000; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 1001; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 1002; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 1003; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 1004; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 1005; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 1006; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 1007; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 1008; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 1009; CHECK-NEXT: v_mul_lo_u32 v7, v2, v6 1010; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 1011; CHECK-NEXT: v_mul_hi_u32 v11, v2, v6 1012; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 1013; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 1014; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 1015; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 1016; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 1017; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 1018; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 1019; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 1020; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 1021; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 1022; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 1023; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 1024; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 1025; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 1026; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 1027; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc 1028; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 1029; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1030; CHECK-NEXT: v_mul_lo_u32 v4, v1, v2 1031; CHECK-NEXT: v_mul_hi_u32 v5, v0, v2 1032; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 1033; CHECK-NEXT: v_mul_lo_u32 v6, v0, v3 1034; CHECK-NEXT: v_mul_lo_u32 v7, v1, v3 1035; CHECK-NEXT: v_mul_hi_u32 v8, v0, v3 1036; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 1037; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 1038; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1039; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 1040; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1041; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 1042; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1043; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 1044; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1045; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 1046; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 1047; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 1048; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1049; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 1050; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 1051; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 1052; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 1053; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 1054; CHECK-NEXT: v_mul_lo_u32 v3, s6, v3 1055; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 1056; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 1057; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 1058; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v1, v2, vcc 1059; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 1060; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 1061; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] 1062; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v3 1063; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 1064; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1065; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1066; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 1067; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s6, v0 1068; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1069; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v4 1070; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 1071; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 1072; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc 1073; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v4 1074; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc 1075; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1076; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc 1077; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 1078; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 1079; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 1080; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1081; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1082; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 1083; CHECK-NEXT: s_setpc_b64 s[30:31] 1084 %result = urem i64 %num, 4096 1085 ret i64 %result 1086} 1087 1088define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) { 1089; GISEL-LABEL: v_urem_v2i64_pow2k_denom: 1090; GISEL: ; %bb.0: 1091; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1092; GISEL-NEXT: s_movk_i32 s10, 0x1000 1093; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s10 1094; GISEL-NEXT: s_sub_u32 s8, 0, s10 1095; GISEL-NEXT: s_cselect_b32 s4, 1, 0 1096; GISEL-NEXT: v_cvt_f32_u32_e32 v5, 0 1097; GISEL-NEXT: v_mov_b32_e32 v6, v4 1098; GISEL-NEXT: s_and_b32 s4, s4, 1 1099; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 1100; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 1101; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 1102; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6 1103; GISEL-NEXT: s_cmp_lg_u32 s4, 0 1104; GISEL-NEXT: s_subb_u32 s9, 0, 0 1105; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 1106; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 1107; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 1108; GISEL-NEXT: s_sub_u32 s11, 0, s10 1109; GISEL-NEXT: s_cselect_b32 s4, 1, 0 1110; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 1111; GISEL-NEXT: v_trunc_f32_e32 v6, v6 1112; GISEL-NEXT: s_and_b32 s4, s4, 1 1113; GISEL-NEXT: v_trunc_f32_e32 v7, v7 1114; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 1115; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 1116; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 1117; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 1118; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 1119; GISEL-NEXT: s_cmp_lg_u32 s4, 0 1120; GISEL-NEXT: s_subb_u32 s6, 0, 0 1121; GISEL-NEXT: v_mul_lo_u32 v8, s11, v6 1122; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 1123; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7 1124; GISEL-NEXT: v_mul_lo_u32 v10, s11, v4 1125; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 1126; GISEL-NEXT: v_mul_hi_u32 v12, s11, v4 1127; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5 1128; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 1129; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 1130; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 1131; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10 1132; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10 1133; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 1134; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 1135; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13 1136; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13 1137; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13 1138; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 1139; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 1140; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 1141; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8 1142; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 1143; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 1144; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9 1145; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19 1146; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 1147; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 1148; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9 1149; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9 1150; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 1151; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 1152; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 1153; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 1154; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 1155; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 1156; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 1157; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 1158; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1159; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 1160; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 1161; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1162; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 1163; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 1164; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1165; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16 1166; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18 1167; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 1168; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1169; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1170; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 1171; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1172; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1173; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15 1174; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 1175; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 1176; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 1177; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc 1178; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 1179; GISEL-NEXT: v_mul_lo_u32 v8, s11, v4 1180; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 1181; GISEL-NEXT: v_mul_hi_u32 v12, s11, v4 1182; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 1183; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] 1184; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 1185; GISEL-NEXT: v_mul_lo_u32 v9, s8, v5 1186; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 1187; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 1188; GISEL-NEXT: v_mul_lo_u32 v16, s11, v10 1189; GISEL-NEXT: v_mul_lo_u32 v17, v10, v8 1190; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 1191; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 1192; GISEL-NEXT: v_mul_lo_u32 v19, s8, v13 1193; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 1194; GISEL-NEXT: v_mul_lo_u32 v16, v13, v9 1195; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 1196; GISEL-NEXT: v_mul_hi_u32 v19, v5, v9 1197; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 1198; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 1199; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 1200; GISEL-NEXT: v_mul_lo_u32 v14, v4, v11 1201; GISEL-NEXT: v_mul_lo_u32 v15, v5, v12 1202; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 1203; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] 1204; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 1205; GISEL-NEXT: v_mul_lo_u32 v15, v10, v11 1206; GISEL-NEXT: v_mul_hi_u32 v19, v4, v11 1207; GISEL-NEXT: v_mul_hi_u32 v10, v10, v11 1208; GISEL-NEXT: v_mul_lo_u32 v11, v13, v12 1209; GISEL-NEXT: v_mul_hi_u32 v13, v13, v12 1210; GISEL-NEXT: v_mul_hi_u32 v12, v5, v12 1211; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 1212; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] 1213; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 1214; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] 1215; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 1216; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] 1217; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 1218; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] 1219; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 1220; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 1221; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 1222; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 1223; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] 1224; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 1225; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 1226; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 1227; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 1228; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 1229; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] 1230; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 1231; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] 1232; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 1233; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 1234; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 1235; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 1236; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc 1237; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] 1238; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 1239; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc 1240; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 1241; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 1242; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 1243; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 1244; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc 1245; GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 1246; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5 1247; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 1248; GISEL-NEXT: v_mul_lo_u32 v12, v2, v6 1249; GISEL-NEXT: v_mul_lo_u32 v13, v3, v6 1250; GISEL-NEXT: v_mul_hi_u32 v14, v2, v6 1251; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 1252; GISEL-NEXT: v_mul_lo_u32 v15, v0, v7 1253; GISEL-NEXT: v_mul_lo_u32 v16, v1, v7 1254; GISEL-NEXT: v_mul_hi_u32 v17, v0, v7 1255; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 1256; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 1257; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1258; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4 1259; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1260; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 1261; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1262; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5 1263; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1264; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 1265; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1266; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 1267; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1268; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 1269; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1270; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 1271; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1272; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 1273; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 1274; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 1275; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 1276; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 1277; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1278; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 1279; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1280; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 1281; GISEL-NEXT: v_mul_lo_u32 v10, s10, v4 1282; GISEL-NEXT: v_mul_lo_u32 v12, 0, v4 1283; GISEL-NEXT: v_mul_hi_u32 v4, s10, v4 1284; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 1285; GISEL-NEXT: v_mul_lo_u32 v11, s10, v5 1286; GISEL-NEXT: v_mul_lo_u32 v13, 0, v5 1287; GISEL-NEXT: v_mul_hi_u32 v5, s10, v5 1288; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1289; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 1290; GISEL-NEXT: v_mul_lo_u32 v6, s10, v6 1291; GISEL-NEXT: v_mul_lo_u32 v7, s10, v7 1292; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6 1293; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 1294; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 1295; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 1296; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 1297; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc 1298; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 1299; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v2 1300; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 1301; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11 1302; GISEL-NEXT: v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5] 1303; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v5 1304; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v0 1305; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] 1306; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v6 1307; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7] 1308; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 1309; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v7 1310; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc 1311; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] 1312; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 1313; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc 1314; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, s10, v2 1315; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 1316; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v8 1317; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc 1318; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 1319; GISEL-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc 1320; GISEL-NEXT: v_subrev_i32_e32 v9, vcc, s10, v0 1321; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1322; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v9 1323; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 1324; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 1325; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 1326; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, s10, v8 1327; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc 1328; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 1329; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc 1330; GISEL-NEXT: v_subrev_i32_e32 v16, vcc, s10, v9 1331; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v1, vcc 1332; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1333; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc 1334; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1335; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v11, vcc 1336; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 1337; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc 1338; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 1339; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[4:5] 1340; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc 1341; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 1342; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 1343; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] 1344; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 1345; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5] 1346; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] 1347; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc 1348; GISEL-NEXT: s_setpc_b64 s[30:31] 1349; 1350; CGP-LABEL: v_urem_v2i64_pow2k_denom: 1351; CGP: ; %bb.0: 1352; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1353; CGP-NEXT: s_movk_i32 s10, 0x1000 1354; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0 1355; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 1356; CGP-NEXT: s_mov_b32 s8, 0xfffff000 1357; CGP-NEXT: v_mov_b32_e32 v6, v5 1358; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 1359; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 1360; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 1361; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 1362; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 1363; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 1364; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 1365; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 1366; CGP-NEXT: v_trunc_f32_e32 v6, v6 1367; CGP-NEXT: v_trunc_f32_e32 v7, v7 1368; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 1369; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 1370; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 1371; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 1372; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 1373; CGP-NEXT: v_mul_lo_u32 v8, s8, v6 1374; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 1375; CGP-NEXT: v_mul_lo_u32 v9, s8, v7 1376; CGP-NEXT: v_mul_lo_u32 v10, s8, v4 1377; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 1378; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 1379; CGP-NEXT: v_mul_lo_u32 v13, s8, v5 1380; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 1381; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 1382; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 1383; CGP-NEXT: v_mul_lo_u32 v11, v6, v10 1384; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 1385; CGP-NEXT: v_mul_hi_u32 v10, v6, v10 1386; CGP-NEXT: v_add_i32_e32 v9, vcc, v14, v9 1387; CGP-NEXT: v_mul_lo_u32 v14, v7, v13 1388; CGP-NEXT: v_mul_hi_u32 v17, v5, v13 1389; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 1390; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 1391; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 1392; CGP-NEXT: v_mul_lo_u32 v12, v4, v8 1393; CGP-NEXT: v_mul_lo_u32 v15, v6, v8 1394; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 1395; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 1396; CGP-NEXT: v_mul_lo_u32 v19, v5, v9 1397; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 1398; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 1399; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 1400; CGP-NEXT: v_mul_lo_u32 v14, v7, v9 1401; CGP-NEXT: v_mul_hi_u32 v17, v5, v9 1402; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 1403; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 1404; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 1405; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 1406; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 1407; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 1408; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 1409; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 1410; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1411; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 1412; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 1413; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1414; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 1415; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 1416; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1417; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v16 1418; CGP-NEXT: v_add_i32_e32 v15, vcc, v19, v18 1419; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 1420; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1421; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1422; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 1423; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1424; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1425; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v15 1426; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 1427; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 1428; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 1429; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc 1430; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 1431; CGP-NEXT: v_mul_lo_u32 v8, s8, v4 1432; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 1433; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 1434; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 1435; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] 1436; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 1437; CGP-NEXT: v_mul_lo_u32 v9, s8, v5 1438; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 1439; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 1440; CGP-NEXT: v_mul_lo_u32 v16, s8, v10 1441; CGP-NEXT: v_mul_lo_u32 v17, v10, v8 1442; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 1443; CGP-NEXT: v_mul_hi_u32 v8, v10, v8 1444; CGP-NEXT: v_mul_lo_u32 v19, s8, v13 1445; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 1446; CGP-NEXT: v_mul_lo_u32 v16, v13, v9 1447; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 1448; CGP-NEXT: v_mul_hi_u32 v19, v5, v9 1449; CGP-NEXT: v_mul_hi_u32 v9, v13, v9 1450; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 1451; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 1452; CGP-NEXT: v_mul_lo_u32 v14, v4, v11 1453; CGP-NEXT: v_mul_lo_u32 v15, v5, v12 1454; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 1455; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] 1456; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 1457; CGP-NEXT: v_mul_lo_u32 v15, v10, v11 1458; CGP-NEXT: v_mul_hi_u32 v19, v4, v11 1459; CGP-NEXT: v_mul_hi_u32 v10, v10, v11 1460; CGP-NEXT: v_mul_lo_u32 v11, v13, v12 1461; CGP-NEXT: v_mul_hi_u32 v13, v13, v12 1462; CGP-NEXT: v_mul_hi_u32 v12, v5, v12 1463; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 1464; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] 1465; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 1466; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] 1467; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 1468; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] 1469; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 1470; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] 1471; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 1472; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 1473; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 1474; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 1475; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] 1476; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 1477; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 1478; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 1479; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 1480; CGP-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 1481; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] 1482; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 1483; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] 1484; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 1485; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 1486; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 1487; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 1488; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc 1489; CGP-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] 1490; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 1491; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc 1492; CGP-NEXT: v_mul_lo_u32 v8, v3, v4 1493; CGP-NEXT: v_mul_hi_u32 v10, v2, v4 1494; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 1495; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 1496; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc 1497; CGP-NEXT: v_mul_lo_u32 v9, v1, v5 1498; CGP-NEXT: v_mul_hi_u32 v11, v0, v5 1499; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 1500; CGP-NEXT: v_mul_lo_u32 v12, v2, v6 1501; CGP-NEXT: v_mul_lo_u32 v13, v3, v6 1502; CGP-NEXT: v_mul_hi_u32 v14, v2, v6 1503; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 1504; CGP-NEXT: v_mul_lo_u32 v15, v0, v7 1505; CGP-NEXT: v_mul_lo_u32 v16, v1, v7 1506; CGP-NEXT: v_mul_hi_u32 v17, v0, v7 1507; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 1508; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 1509; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1510; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 1511; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1512; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 1513; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1514; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 1515; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1516; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 1517; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1518; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 1519; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1520; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 1521; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1522; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 1523; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1524; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 1525; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 1526; CGP-NEXT: v_add_i32_e32 v9, vcc, v15, v9 1527; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 1528; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 1529; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1530; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 1531; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1532; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 1533; CGP-NEXT: v_mul_lo_u32 v10, s10, v4 1534; CGP-NEXT: v_mul_lo_u32 v12, 0, v4 1535; CGP-NEXT: v_mul_hi_u32 v4, s10, v4 1536; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 1537; CGP-NEXT: v_mul_lo_u32 v11, s10, v5 1538; CGP-NEXT: v_mul_lo_u32 v13, 0, v5 1539; CGP-NEXT: v_mul_hi_u32 v5, s10, v5 1540; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1541; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 1542; CGP-NEXT: v_mul_lo_u32 v6, s10, v6 1543; CGP-NEXT: v_mul_lo_u32 v7, s10, v7 1544; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 1545; CGP-NEXT: v_add_i32_e32 v7, vcc, v13, v7 1546; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 1547; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 1548; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 1549; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc 1550; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 1551; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v2 1552; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 1553; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11 1554; CGP-NEXT: v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5] 1555; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v5 1556; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v0 1557; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] 1558; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v6 1559; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7] 1560; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 1561; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v7 1562; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc 1563; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] 1564; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 1565; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc 1566; CGP-NEXT: v_subrev_i32_e32 v8, vcc, s10, v2 1567; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 1568; CGP-NEXT: v_cmp_le_u32_e32 vcc, s10, v8 1569; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc 1570; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 1571; CGP-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc 1572; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s10, v0 1573; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1574; CGP-NEXT: v_cmp_le_u32_e32 vcc, s10, v9 1575; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 1576; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 1577; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 1578; CGP-NEXT: v_subrev_i32_e32 v13, vcc, s10, v8 1579; CGP-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc 1580; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 1581; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc 1582; CGP-NEXT: v_subrev_i32_e32 v16, vcc, s10, v9 1583; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v1, vcc 1584; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1585; CGP-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc 1586; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1587; CGP-NEXT: v_cndmask_b32_e32 v11, v15, v11, vcc 1588; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 1589; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc 1590; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 1591; CGP-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[4:5] 1592; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc 1593; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 1594; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 1595; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] 1596; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 1597; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5] 1598; CGP-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] 1599; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc 1600; CGP-NEXT: s_setpc_b64 s[30:31] 1601 %result = urem <2 x i64> %num, <i64 4096, i64 4096> 1602 ret <2 x i64> %result 1603} 1604 1605define i64 @v_urem_i64_oddk_denom(i64 %num) { 1606; CHECK-LABEL: v_urem_i64_oddk_denom: 1607; CHECK: ; %bb.0: 1608; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1609; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb 1610; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0 1611; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 1612; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 1613; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v2 1614; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v3 1615; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 1616; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 1617; CHECK-NEXT: v_trunc_f32_e32 v3, v3 1618; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 1619; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 1620; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 1621; CHECK-NEXT: v_mul_lo_u32 v4, s7, v3 1622; CHECK-NEXT: v_mul_lo_u32 v5, s7, v2 1623; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 1624; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 1625; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 1626; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 1627; CHECK-NEXT: v_mul_hi_u32 v8, v2, v5 1628; CHECK-NEXT: v_mul_hi_u32 v5, v3, v5 1629; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 1630; CHECK-NEXT: v_mul_lo_u32 v7, v2, v4 1631; CHECK-NEXT: v_mul_lo_u32 v9, v3, v4 1632; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 1633; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4 1634; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 1635; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1636; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 1637; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1638; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1639; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1640; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 1641; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1642; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 1643; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 1644; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 1645; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1646; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 1647; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 1648; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 1649; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v3, v4, vcc 1650; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v4 1651; CHECK-NEXT: v_mul_lo_u32 v4, s7, v2 1652; CHECK-NEXT: v_mul_lo_u32 v6, -1, v2 1653; CHECK-NEXT: v_mul_hi_u32 v7, s7, v2 1654; CHECK-NEXT: v_mul_lo_u32 v8, s7, v5 1655; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 1656; CHECK-NEXT: v_mul_hi_u32 v10, v2, v4 1657; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 1658; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 1659; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 1660; CHECK-NEXT: v_mul_lo_u32 v7, v2, v6 1661; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 1662; CHECK-NEXT: v_mul_hi_u32 v11, v2, v6 1663; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 1664; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 1665; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 1666; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 1667; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 1668; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 1669; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 1670; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 1671; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 1672; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 1673; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 1674; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 1675; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 1676; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 1677; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 1678; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc 1679; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 1680; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1681; CHECK-NEXT: v_mul_lo_u32 v4, v1, v2 1682; CHECK-NEXT: v_mul_hi_u32 v5, v0, v2 1683; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 1684; CHECK-NEXT: v_mul_lo_u32 v6, v0, v3 1685; CHECK-NEXT: v_mul_lo_u32 v7, v1, v3 1686; CHECK-NEXT: v_mul_hi_u32 v8, v0, v3 1687; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 1688; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 1689; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1690; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 1691; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1692; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 1693; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1694; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8 1695; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1696; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 1697; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 1698; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 1699; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1700; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 1701; CHECK-NEXT: v_mul_lo_u32 v5, s6, v2 1702; CHECK-NEXT: v_mul_lo_u32 v6, 0, v2 1703; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 1704; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 1705; CHECK-NEXT: v_mul_lo_u32 v3, s6, v3 1706; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v3 1707; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 1708; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 1709; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v1, v2, vcc 1710; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 1711; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 1712; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] 1713; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v3 1714; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 1715; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1716; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1717; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 1718; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s6, v0 1719; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1720; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v4 1721; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 1722; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 1723; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc 1724; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v4 1725; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc 1726; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1727; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc 1728; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 1729; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 1730; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 1731; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 1732; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1733; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 1734; CHECK-NEXT: s_setpc_b64 s[30:31] 1735 %result = urem i64 %num, 1235195 1736 ret i64 %result 1737} 1738 1739define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { 1740; GISEL-LABEL: v_urem_v2i64_oddk_denom: 1741; GISEL: ; %bb.0: 1742; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1743; GISEL-NEXT: s_mov_b32 s10, 0x12d8fb 1744; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s10 1745; GISEL-NEXT: s_sub_u32 s8, 0, s10 1746; GISEL-NEXT: s_cselect_b32 s4, 1, 0 1747; GISEL-NEXT: v_cvt_f32_u32_e32 v5, 0 1748; GISEL-NEXT: v_mov_b32_e32 v6, v4 1749; GISEL-NEXT: s_and_b32 s4, s4, 1 1750; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 1751; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 1752; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 1753; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6 1754; GISEL-NEXT: s_cmp_lg_u32 s4, 0 1755; GISEL-NEXT: s_subb_u32 s9, 0, 0 1756; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 1757; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 1758; GISEL-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 1759; GISEL-NEXT: s_sub_u32 s11, 0, s10 1760; GISEL-NEXT: s_cselect_b32 s4, 1, 0 1761; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 1762; GISEL-NEXT: v_trunc_f32_e32 v6, v6 1763; GISEL-NEXT: s_and_b32 s4, s4, 1 1764; GISEL-NEXT: v_trunc_f32_e32 v7, v7 1765; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 1766; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 1767; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 1768; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 1769; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 1770; GISEL-NEXT: s_cmp_lg_u32 s4, 0 1771; GISEL-NEXT: s_subb_u32 s6, 0, 0 1772; GISEL-NEXT: v_mul_lo_u32 v8, s11, v6 1773; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 1774; GISEL-NEXT: v_mul_lo_u32 v9, s8, v7 1775; GISEL-NEXT: v_mul_lo_u32 v10, s11, v4 1776; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 1777; GISEL-NEXT: v_mul_hi_u32 v12, s11, v4 1778; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5 1779; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 1780; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 1781; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 1782; GISEL-NEXT: v_mul_lo_u32 v11, v6, v10 1783; GISEL-NEXT: v_mul_hi_u32 v16, v4, v10 1784; GISEL-NEXT: v_mul_hi_u32 v10, v6, v10 1785; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9 1786; GISEL-NEXT: v_mul_lo_u32 v14, v7, v13 1787; GISEL-NEXT: v_mul_hi_u32 v17, v5, v13 1788; GISEL-NEXT: v_mul_hi_u32 v13, v7, v13 1789; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 1790; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 1791; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 1792; GISEL-NEXT: v_mul_lo_u32 v15, v6, v8 1793; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 1794; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 1795; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9 1796; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v19 1797; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 1798; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 1799; GISEL-NEXT: v_mul_lo_u32 v14, v7, v9 1800; GISEL-NEXT: v_mul_hi_u32 v17, v5, v9 1801; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 1802; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 1803; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 1804; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 1805; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 1806; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 1807; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 1808; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 1809; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 1810; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 1811; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 1812; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 1813; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 1814; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 1815; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1816; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v16 1817; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v18 1818; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 1819; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 1820; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1821; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 1822; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1823; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 1824; GISEL-NEXT: v_add_i32_e32 v12, vcc, v14, v15 1825; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 1826; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 1827; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 1828; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc 1829; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 1830; GISEL-NEXT: v_mul_lo_u32 v8, s11, v4 1831; GISEL-NEXT: v_mul_lo_u32 v11, s6, v4 1832; GISEL-NEXT: v_mul_hi_u32 v12, s11, v4 1833; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 1834; GISEL-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] 1835; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 1836; GISEL-NEXT: v_mul_lo_u32 v9, s8, v5 1837; GISEL-NEXT: v_mul_lo_u32 v14, s9, v5 1838; GISEL-NEXT: v_mul_hi_u32 v15, s8, v5 1839; GISEL-NEXT: v_mul_lo_u32 v16, s11, v10 1840; GISEL-NEXT: v_mul_lo_u32 v17, v10, v8 1841; GISEL-NEXT: v_mul_hi_u32 v18, v4, v8 1842; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 1843; GISEL-NEXT: v_mul_lo_u32 v19, s8, v13 1844; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 1845; GISEL-NEXT: v_mul_lo_u32 v16, v13, v9 1846; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 1847; GISEL-NEXT: v_mul_hi_u32 v19, v5, v9 1848; GISEL-NEXT: v_mul_hi_u32 v9, v13, v9 1849; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 1850; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 1851; GISEL-NEXT: v_mul_lo_u32 v14, v4, v11 1852; GISEL-NEXT: v_mul_lo_u32 v15, v5, v12 1853; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 1854; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] 1855; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 1856; GISEL-NEXT: v_mul_lo_u32 v15, v10, v11 1857; GISEL-NEXT: v_mul_hi_u32 v19, v4, v11 1858; GISEL-NEXT: v_mul_hi_u32 v10, v10, v11 1859; GISEL-NEXT: v_mul_lo_u32 v11, v13, v12 1860; GISEL-NEXT: v_mul_hi_u32 v13, v13, v12 1861; GISEL-NEXT: v_mul_hi_u32 v12, v5, v12 1862; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 1863; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] 1864; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 1865; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] 1866; GISEL-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 1867; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] 1868; GISEL-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 1869; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] 1870; GISEL-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 1871; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 1872; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 1873; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 1874; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] 1875; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 1876; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 1877; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 1878; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 1879; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 1880; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] 1881; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 1882; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] 1883; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 1884; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 1885; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 1886; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 1887; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc 1888; GISEL-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] 1889; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 1890; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc 1891; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 1892; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 1893; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 1894; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 1895; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc 1896; GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 1897; GISEL-NEXT: v_mul_hi_u32 v11, v0, v5 1898; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 1899; GISEL-NEXT: v_mul_lo_u32 v12, v2, v6 1900; GISEL-NEXT: v_mul_lo_u32 v13, v3, v6 1901; GISEL-NEXT: v_mul_hi_u32 v14, v2, v6 1902; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6 1903; GISEL-NEXT: v_mul_lo_u32 v15, v0, v7 1904; GISEL-NEXT: v_mul_lo_u32 v16, v1, v7 1905; GISEL-NEXT: v_mul_hi_u32 v17, v0, v7 1906; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 1907; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 1908; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 1909; GISEL-NEXT: v_add_i32_e32 v4, vcc, v13, v4 1910; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 1911; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 1912; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 1913; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5 1914; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 1915; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 1916; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1917; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14 1918; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1919; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 1920; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1921; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 1922; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1923; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 1924; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 1925; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 1926; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 1927; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 1928; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1929; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 1930; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1931; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 1932; GISEL-NEXT: v_mul_lo_u32 v10, s10, v4 1933; GISEL-NEXT: v_mul_lo_u32 v12, 0, v4 1934; GISEL-NEXT: v_mul_hi_u32 v4, s10, v4 1935; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 1936; GISEL-NEXT: v_mul_lo_u32 v11, s10, v5 1937; GISEL-NEXT: v_mul_lo_u32 v13, 0, v5 1938; GISEL-NEXT: v_mul_hi_u32 v5, s10, v5 1939; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1940; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 1941; GISEL-NEXT: v_mul_lo_u32 v6, s10, v6 1942; GISEL-NEXT: v_mul_lo_u32 v7, s10, v7 1943; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6 1944; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 1945; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4 1946; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 1947; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 1948; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc 1949; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 1950; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v2 1951; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 1952; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11 1953; GISEL-NEXT: v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5] 1954; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v5 1955; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v0 1956; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] 1957; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v6 1958; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7] 1959; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 1960; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v7 1961; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc 1962; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] 1963; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 1964; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc 1965; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, s10, v2 1966; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 1967; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v8 1968; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc 1969; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 1970; GISEL-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc 1971; GISEL-NEXT: v_subrev_i32_e32 v9, vcc, s10, v0 1972; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 1973; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v9 1974; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 1975; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 1976; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 1977; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, s10, v8 1978; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc 1979; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 1980; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc 1981; GISEL-NEXT: v_subrev_i32_e32 v16, vcc, s10, v9 1982; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v1, vcc 1983; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1984; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc 1985; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1986; GISEL-NEXT: v_cndmask_b32_e32 v11, v15, v11, vcc 1987; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 1988; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc 1989; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 1990; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[4:5] 1991; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc 1992; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 1993; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 1994; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] 1995; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 1996; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5] 1997; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] 1998; GISEL-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc 1999; GISEL-NEXT: s_setpc_b64 s[30:31] 2000; 2001; CGP-LABEL: v_urem_v2i64_oddk_denom: 2002; CGP: ; %bb.0: 2003; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2004; CGP-NEXT: s_mov_b32 s10, 0x12d8fb 2005; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0 2006; CGP-NEXT: v_cvt_f32_u32_e32 v5, s10 2007; CGP-NEXT: s_mov_b32 s8, 0xffed2705 2008; CGP-NEXT: v_mov_b32_e32 v6, v5 2009; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v4 2010; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v4 2011; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v5 2012; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 2013; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 2014; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 2015; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 2016; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 2017; CGP-NEXT: v_trunc_f32_e32 v6, v6 2018; CGP-NEXT: v_trunc_f32_e32 v7, v7 2019; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 2020; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 2021; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 2022; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 2023; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 2024; CGP-NEXT: v_mul_lo_u32 v8, s8, v6 2025; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 2026; CGP-NEXT: v_mul_lo_u32 v9, s8, v7 2027; CGP-NEXT: v_mul_lo_u32 v10, s8, v4 2028; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 2029; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 2030; CGP-NEXT: v_mul_lo_u32 v13, s8, v5 2031; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 2032; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 2033; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8 2034; CGP-NEXT: v_mul_lo_u32 v11, v6, v10 2035; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 2036; CGP-NEXT: v_mul_hi_u32 v10, v6, v10 2037; CGP-NEXT: v_add_i32_e32 v9, vcc, v14, v9 2038; CGP-NEXT: v_mul_lo_u32 v14, v7, v13 2039; CGP-NEXT: v_mul_hi_u32 v17, v5, v13 2040; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 2041; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 2042; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 2043; CGP-NEXT: v_mul_lo_u32 v12, v4, v8 2044; CGP-NEXT: v_mul_lo_u32 v15, v6, v8 2045; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 2046; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 2047; CGP-NEXT: v_mul_lo_u32 v19, v5, v9 2048; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 2049; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 2050; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 2051; CGP-NEXT: v_mul_lo_u32 v14, v7, v9 2052; CGP-NEXT: v_mul_hi_u32 v17, v5, v9 2053; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 2054; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v12 2055; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 2056; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v15, v10 2057; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 2058; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 2059; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 2060; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 2061; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 2062; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v18 2063; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 2064; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 2065; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 2066; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 2067; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 2068; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v16 2069; CGP-NEXT: v_add_i32_e32 v15, vcc, v19, v18 2070; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 2071; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 2072; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2073; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 2074; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2075; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 2076; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v15 2077; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 2078; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 2079; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10 2080; CGP-NEXT: v_addc_u32_e64 v10, s[4:5], v6, v8, vcc 2081; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 2082; CGP-NEXT: v_mul_lo_u32 v8, s8, v4 2083; CGP-NEXT: v_mul_lo_u32 v11, -1, v4 2084; CGP-NEXT: v_mul_hi_u32 v12, s8, v4 2085; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v13 2086; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v7, v9, s[4:5] 2087; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 2088; CGP-NEXT: v_mul_lo_u32 v9, s8, v5 2089; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 2090; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 2091; CGP-NEXT: v_mul_lo_u32 v16, s8, v10 2092; CGP-NEXT: v_mul_lo_u32 v17, v10, v8 2093; CGP-NEXT: v_mul_hi_u32 v18, v4, v8 2094; CGP-NEXT: v_mul_hi_u32 v8, v10, v8 2095; CGP-NEXT: v_mul_lo_u32 v19, s8, v13 2096; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 2097; CGP-NEXT: v_mul_lo_u32 v16, v13, v9 2098; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v14, v19 2099; CGP-NEXT: v_mul_hi_u32 v19, v5, v9 2100; CGP-NEXT: v_mul_hi_u32 v9, v13, v9 2101; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 2102; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v14, v15 2103; CGP-NEXT: v_mul_lo_u32 v14, v4, v11 2104; CGP-NEXT: v_mul_lo_u32 v15, v5, v12 2105; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v15 2106; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] 2107; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v15, v19 2108; CGP-NEXT: v_mul_lo_u32 v15, v10, v11 2109; CGP-NEXT: v_mul_hi_u32 v19, v4, v11 2110; CGP-NEXT: v_mul_hi_u32 v10, v10, v11 2111; CGP-NEXT: v_mul_lo_u32 v11, v13, v12 2112; CGP-NEXT: v_mul_hi_u32 v13, v13, v12 2113; CGP-NEXT: v_mul_hi_u32 v12, v5, v12 2114; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v17, v14 2115; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] 2116; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v15, v8 2117; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] 2118; CGP-NEXT: v_add_i32_e64 v9, s[8:9], v11, v9 2119; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[8:9] 2120; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v18 2121; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] 2122; CGP-NEXT: v_add_i32_e64 v8, s[8:9], v8, v19 2123; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 2124; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 2125; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 2126; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] 2127; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v17, v14 2128; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 2129; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v19 2130; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v12 2131; CGP-NEXT: v_add_i32_e64 v8, s[6:7], v8, v14 2132; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] 2133; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 2134; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] 2135; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v15, v12 2136; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v14 2137; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v12 2138; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v13, v11 2139; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc 2140; CGP-NEXT: v_addc_u32_e64 v7, vcc, v7, v11, s[4:5] 2141; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 2142; CGP-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc 2143; CGP-NEXT: v_mul_lo_u32 v8, v3, v4 2144; CGP-NEXT: v_mul_hi_u32 v10, v2, v4 2145; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 2146; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 2147; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc 2148; CGP-NEXT: v_mul_lo_u32 v9, v1, v5 2149; CGP-NEXT: v_mul_hi_u32 v11, v0, v5 2150; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 2151; CGP-NEXT: v_mul_lo_u32 v12, v2, v6 2152; CGP-NEXT: v_mul_lo_u32 v13, v3, v6 2153; CGP-NEXT: v_mul_hi_u32 v14, v2, v6 2154; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 2155; CGP-NEXT: v_mul_lo_u32 v15, v0, v7 2156; CGP-NEXT: v_mul_lo_u32 v16, v1, v7 2157; CGP-NEXT: v_mul_hi_u32 v17, v0, v7 2158; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 2159; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12 2160; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2161; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4 2162; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2163; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 2164; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2165; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 2166; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 2167; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 2168; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2169; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14 2170; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2171; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 2172; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2173; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 2174; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2175; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8 2176; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 2177; CGP-NEXT: v_add_i32_e32 v9, vcc, v15, v9 2178; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 2179; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8 2180; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2181; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 2182; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2183; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 2184; CGP-NEXT: v_mul_lo_u32 v10, s10, v4 2185; CGP-NEXT: v_mul_lo_u32 v12, 0, v4 2186; CGP-NEXT: v_mul_hi_u32 v4, s10, v4 2187; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 2188; CGP-NEXT: v_mul_lo_u32 v11, s10, v5 2189; CGP-NEXT: v_mul_lo_u32 v13, 0, v5 2190; CGP-NEXT: v_mul_hi_u32 v5, s10, v5 2191; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 2192; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 2193; CGP-NEXT: v_mul_lo_u32 v6, s10, v6 2194; CGP-NEXT: v_mul_lo_u32 v7, s10, v7 2195; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6 2196; CGP-NEXT: v_add_i32_e32 v7, vcc, v13, v7 2197; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 2198; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 2199; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 2200; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc 2201; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 2202; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v2 2203; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 2204; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11 2205; CGP-NEXT: v_subb_u32_e64 v7, s[6:7], v1, v5, s[4:5] 2206; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v5 2207; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v0 2208; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] 2209; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v6 2210; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7] 2211; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 2212; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v7 2213; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc 2214; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] 2215; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 2216; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc 2217; CGP-NEXT: v_subrev_i32_e32 v8, vcc, s10, v2 2218; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 2219; CGP-NEXT: v_cmp_le_u32_e32 vcc, s10, v8 2220; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc 2221; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 2222; CGP-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc 2223; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s10, v0 2224; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 2225; CGP-NEXT: v_cmp_le_u32_e32 vcc, s10, v9 2226; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 2227; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 2228; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 2229; CGP-NEXT: v_subrev_i32_e32 v13, vcc, s10, v8 2230; CGP-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc 2231; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 2232; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc 2233; CGP-NEXT: v_subrev_i32_e32 v16, vcc, s10, v9 2234; CGP-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v1, vcc 2235; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 2236; CGP-NEXT: v_cndmask_b32_e32 v10, v12, v10, vcc 2237; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 2238; CGP-NEXT: v_cndmask_b32_e32 v11, v15, v11, vcc 2239; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 2240; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v13, vcc 2241; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 2242; CGP-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[4:5] 2243; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc 2244; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 2245; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 2246; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5] 2247; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 2248; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[4:5] 2249; CGP-NEXT: v_cndmask_b32_e64 v1, v7, v1, s[4:5] 2250; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc 2251; CGP-NEXT: s_setpc_b64 s[30:31] 2252 %result = urem <2 x i64> %num, <i64 1235195, i64 1235195> 2253 ret <2 x i64> %result 2254} 2255 2256define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) { 2257; CHECK-LABEL: v_urem_i64_pow2_shl_denom: 2258; CHECK: ; %bb.0: 2259; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2260; CHECK-NEXT: s_movk_i32 s4, 0x1000 2261; CHECK-NEXT: s_mov_b32 s5, 0 2262; CHECK-NEXT: v_mov_b32_e32 v6, 0 2263; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2 2264; CHECK-NEXT: v_or_b32_e32 v7, v1, v5 2265; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] 2266; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 2267; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] 2268; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 2269; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 2270; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7] 2271; CHECK-NEXT: s_cbranch_execz BB7_2 2272; CHECK-NEXT: ; %bb.1: 2273; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v4 2274; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v5 2275; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 2276; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v5, vcc 2277; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 2278; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 2279; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 2280; CHECK-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 2281; CHECK-NEXT: v_trunc_f32_e32 v3, v3 2282; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 2283; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 2284; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 2285; CHECK-NEXT: v_mul_lo_u32 v8, v6, v3 2286; CHECK-NEXT: v_mul_lo_u32 v9, v6, v2 2287; CHECK-NEXT: v_mul_lo_u32 v10, v7, v2 2288; CHECK-NEXT: v_mul_hi_u32 v11, v6, v2 2289; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 2290; CHECK-NEXT: v_mul_lo_u32 v10, v3, v9 2291; CHECK-NEXT: v_mul_hi_u32 v12, v2, v9 2292; CHECK-NEXT: v_mul_hi_u32 v9, v3, v9 2293; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 2294; CHECK-NEXT: v_mul_lo_u32 v11, v2, v8 2295; CHECK-NEXT: v_mul_lo_u32 v13, v3, v8 2296; CHECK-NEXT: v_mul_hi_u32 v14, v2, v8 2297; CHECK-NEXT: v_mul_hi_u32 v8, v3, v8 2298; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11 2299; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2300; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9 2301; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2302; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 2303; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2304; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14 2305; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2306; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 2307; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12 2308; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 2309; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2310; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10 2311; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 2312; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9 2313; CHECK-NEXT: v_addc_u32_e64 v9, s[4:5], v3, v8, vcc 2314; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v3, v8 2315; CHECK-NEXT: v_mul_lo_u32 v8, v6, v2 2316; CHECK-NEXT: v_mul_lo_u32 v7, v7, v2 2317; CHECK-NEXT: v_mul_hi_u32 v10, v6, v2 2318; CHECK-NEXT: v_mul_lo_u32 v6, v6, v9 2319; CHECK-NEXT: v_mul_lo_u32 v11, v9, v8 2320; CHECK-NEXT: v_mul_hi_u32 v12, v2, v8 2321; CHECK-NEXT: v_mul_hi_u32 v8, v9, v8 2322; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 2323; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 2324; CHECK-NEXT: v_mul_lo_u32 v7, v2, v6 2325; CHECK-NEXT: v_mul_lo_u32 v10, v9, v6 2326; CHECK-NEXT: v_mul_hi_u32 v13, v2, v6 2327; CHECK-NEXT: v_mul_hi_u32 v6, v9, v6 2328; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 2329; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 2330; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v10, v8 2331; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 2332; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v12 2333; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 2334; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v13 2335; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 2336; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v9, v7 2337; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v11 2338; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 2339; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] 2340; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 2341; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 2342; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc 2343; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 2344; CHECK-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2345; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2 2346; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 2347; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 2348; CHECK-NEXT: v_mul_lo_u32 v8, v0, v3 2349; CHECK-NEXT: v_mul_lo_u32 v9, v1, v3 2350; CHECK-NEXT: v_mul_hi_u32 v10, v0, v3 2351; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 2352; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 2353; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2354; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 2355; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2356; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 2357; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2358; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 2359; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 2360; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 2361; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 2362; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 2363; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2364; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 2365; CHECK-NEXT: v_mul_lo_u32 v7, v4, v2 2366; CHECK-NEXT: v_mul_lo_u32 v8, v5, v2 2367; CHECK-NEXT: v_mul_hi_u32 v2, v4, v2 2368; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 2369; CHECK-NEXT: v_mul_lo_u32 v3, v4, v3 2370; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 2371; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 2372; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v7 2373; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v2, vcc 2374; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 2375; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v4 2376; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] 2377; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v5 2378; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 2379; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc 2380; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v6, v5 2381; CHECK-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc 2382; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v3, v4 2383; CHECK-NEXT: v_subbrev_u32_e64 v8, s[4:5], 0, v1, vcc 2384; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v4 2385; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 2386; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc 2387; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 2388; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc 2389; CHECK-NEXT: v_sub_i32_e32 v11, vcc, v7, v4 2390; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 2391; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v8, v5 2392; CHECK-NEXT: v_cndmask_b32_e32 v5, v10, v9, vcc 2393; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 2394; CHECK-NEXT: v_cndmask_b32_e32 v5, v7, v11, vcc 2395; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc 2396; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 2397; CHECK-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc 2398; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v1, vcc 2399; CHECK-NEXT: BB7_2: ; %Flow 2400; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] 2401; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] 2402; CHECK-NEXT: s_cbranch_execz BB7_4 2403; CHECK-NEXT: ; %bb.3: 2404; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4 2405; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v4 2406; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 2407; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 2408; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 2409; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1 2410; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 2411; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 2412; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 2413; CHECK-NEXT: v_mul_lo_u32 v1, v1, v4 2414; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 2415; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 2416; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 2417; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2418; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 2419; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 2420; CHECK-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc 2421; CHECK-NEXT: v_mov_b32_e32 v3, 0 2422; CHECK-NEXT: BB7_4: 2423; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] 2424; CHECK-NEXT: v_mov_b32_e32 v0, v2 2425; CHECK-NEXT: v_mov_b32_e32 v1, v3 2426; CHECK-NEXT: s_setpc_b64 s[30:31] 2427 %shl.y = shl i64 4096, %y 2428 %r = urem i64 %x, %shl.y 2429 ret i64 %r 2430} 2431 2432define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { 2433; GISEL-LABEL: v_urem_v2i64_pow2_shl_denom: 2434; GISEL: ; %bb.0: 2435; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2436; GISEL-NEXT: s_movk_i32 s4, 0x1000 2437; GISEL-NEXT: s_mov_b32 s5, 0 2438; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v4 2439; GISEL-NEXT: v_lshl_b64 v[6:7], s[4:5], v6 2440; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 2441; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5 2442; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 2443; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 2444; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 2445; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 2446; GISEL-NEXT: v_trunc_f32_e32 v9, v9 2447; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 2448; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 2449; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 2450; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v4 2451; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc 2452; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 2453; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8 2454; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 2455; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 2456; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 2457; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 2458; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 2459; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13 2460; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12 2461; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 2462; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2463; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 2464; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2465; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 2466; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 2467; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 2468; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 2469; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 2470; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2471; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 2472; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 2473; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 2474; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 2475; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2476; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 2477; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 2478; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 2479; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 2480; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v13, vcc 2481; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 2482; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8 2483; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8 2484; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 2485; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8 2486; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 2487; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 2488; GISEL-NEXT: v_mul_lo_u32 v11, v12, v13 2489; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 2490; GISEL-NEXT: v_mul_hi_u32 v15, v8, v13 2491; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 2492; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 2493; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 2494; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 2495; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 2496; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 2497; GISEL-NEXT: v_mul_hi_u32 v13, v12, v13 2498; GISEL-NEXT: v_mul_hi_u32 v15, v8, v10 2499; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 2500; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 2501; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 2502; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 2503; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 2504; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 2505; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 2506; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 2507; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10 2508; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 2509; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 2510; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc 2511; GISEL-NEXT: v_addc_u32_e64 v9, vcc, 0, v9, s[4:5] 2512; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8 2513; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9 2514; GISEL-NEXT: v_mul_hi_u32 v12, v0, v8 2515; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 2516; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2517; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 2518; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2519; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 2520; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 2521; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8 2522; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9 2523; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 2524; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2525; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 2526; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2527; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2528; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 2529; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2530; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 2531; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 2532; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 2533; GISEL-NEXT: v_mul_lo_u32 v10, v4, v8 2534; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8 2535; GISEL-NEXT: v_mul_lo_u32 v9, v4, v9 2536; GISEL-NEXT: v_mul_hi_u32 v8, v4, v8 2537; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 2538; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 2539; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 2540; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc 2541; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 2542; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v5 2543; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 2544; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 2545; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] 2546; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v5 2547; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] 2548; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], v0, v4 2549; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc 2550; GISEL-NEXT: v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5] 2551; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5 2552; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 2553; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v4 2554; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc 2555; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v11, v5 2556; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 2557; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v10, v4 2558; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5] 2559; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 2560; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 2561; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc 2562; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc 2563; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 2564; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2565; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc 2566; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v6 2567; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v7 2568; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 2569; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 2570; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 2571; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 2572; GISEL-NEXT: v_trunc_f32_e32 v5, v5 2573; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 2574; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 2575; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 2576; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 2577; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc 2578; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4 2579; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 2580; GISEL-NEXT: v_mul_lo_u32 v12, v8, v5 2581; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4 2582; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2583; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 2584; GISEL-NEXT: v_mul_lo_u32 v12, v5, v10 2585; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11 2586; GISEL-NEXT: v_mul_hi_u32 v14, v4, v10 2587; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 2588; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2589; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 2590; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2591; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 2592; GISEL-NEXT: v_mul_lo_u32 v13, v5, v11 2593; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10 2594; GISEL-NEXT: v_mul_hi_u32 v14, v4, v11 2595; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 2596; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2597; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 2598; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2599; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 2600; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 2601; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2602; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 2603; GISEL-NEXT: v_mul_hi_u32 v11, v5, v11 2604; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2605; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 2606; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v5, v11, vcc 2607; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v11 2608; GISEL-NEXT: v_mul_lo_u32 v11, v8, v4 2609; GISEL-NEXT: v_mul_lo_u32 v9, v9, v4 2610; GISEL-NEXT: v_mul_lo_u32 v12, v8, v10 2611; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4 2612; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 2613; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 2614; GISEL-NEXT: v_mul_lo_u32 v9, v10, v11 2615; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8 2616; GISEL-NEXT: v_mul_hi_u32 v13, v4, v11 2617; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 2618; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 2619; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 2620; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] 2621; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v12, v9 2622; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 2623; GISEL-NEXT: v_mul_hi_u32 v11, v10, v11 2624; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8 2625; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 2626; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 2627; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 2628; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 2629; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13 2630; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v11, v9 2631; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 2632; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 2633; GISEL-NEXT: v_mul_hi_u32 v8, v10, v8 2634; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 2635; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v4, v9 2636; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc 2637; GISEL-NEXT: v_addc_u32_e64 v5, vcc, 0, v5, s[4:5] 2638; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4 2639; GISEL-NEXT: v_mul_lo_u32 v9, v2, v5 2640; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4 2641; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 2642; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2643; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 2644; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2645; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 2646; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5 2647; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 2648; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5 2649; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4 2650; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 2651; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10 2652; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2653; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 2654; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8 2655; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 2656; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8 2657; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5 2658; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8 2659; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4 2660; GISEL-NEXT: v_mul_lo_u32 v9, v7, v4 2661; GISEL-NEXT: v_mul_lo_u32 v5, v6, v5 2662; GISEL-NEXT: v_mul_hi_u32 v4, v6, v4 2663; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5 2664; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4 2665; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 2666; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc 2667; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 2668; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v7 2669; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 2670; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 2671; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 2672; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v7 2673; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5] 2674; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v6 2675; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc 2676; GISEL-NEXT: v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5] 2677; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v7 2678; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc 2679; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v6 2680; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 2681; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v7 2682; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 2683; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v8, v6 2684; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5] 2685; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 2686; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 2687; GISEL-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc 2688; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc 2689; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 2690; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 2691; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 2692; GISEL-NEXT: s_setpc_b64 s[30:31] 2693; 2694; CGP-LABEL: v_urem_v2i64_pow2_shl_denom: 2695; CGP: ; %bb.0: 2696; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2697; CGP-NEXT: v_mov_b32_e32 v5, v0 2698; CGP-NEXT: v_mov_b32_e32 v7, v1 2699; CGP-NEXT: s_movk_i32 s4, 0x1000 2700; CGP-NEXT: s_mov_b32 s5, 0 2701; CGP-NEXT: v_mov_b32_e32 v0, 0 2702; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v4 2703; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6 2704; CGP-NEXT: v_or_b32_e32 v1, v7, v11 2705; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] 2706; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 2707; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] 2708; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 2709; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 2710; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] 2711; CGP-NEXT: s_cbranch_execz BB8_2 2712; CGP-NEXT: ; %bb.1: 2713; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 2714; CGP-NEXT: v_cvt_f32_u32_e32 v1, v11 2715; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v10 2716; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v11, vcc 2717; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 2718; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 2719; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 2720; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 2721; CGP-NEXT: v_trunc_f32_e32 v1, v1 2722; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 2723; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1 2724; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 2725; CGP-NEXT: v_mul_lo_u32 v12, v4, v1 2726; CGP-NEXT: v_mul_lo_u32 v13, v4, v0 2727; CGP-NEXT: v_mul_lo_u32 v14, v6, v0 2728; CGP-NEXT: v_mul_hi_u32 v15, v4, v0 2729; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 2730; CGP-NEXT: v_mul_lo_u32 v14, v1, v13 2731; CGP-NEXT: v_mul_hi_u32 v16, v0, v13 2732; CGP-NEXT: v_mul_hi_u32 v13, v1, v13 2733; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 2734; CGP-NEXT: v_mul_lo_u32 v15, v0, v12 2735; CGP-NEXT: v_mul_lo_u32 v17, v1, v12 2736; CGP-NEXT: v_mul_hi_u32 v18, v0, v12 2737; CGP-NEXT: v_mul_hi_u32 v12, v1, v12 2738; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 2739; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2740; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13 2741; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc 2742; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16 2743; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2744; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 2745; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 2746; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 2747; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16 2748; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 2749; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2750; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14 2751; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 2752; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13 2753; CGP-NEXT: v_addc_u32_e64 v13, s[4:5], v1, v12, vcc 2754; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v1, v12 2755; CGP-NEXT: v_mul_lo_u32 v12, v4, v0 2756; CGP-NEXT: v_mul_lo_u32 v6, v6, v0 2757; CGP-NEXT: v_mul_hi_u32 v14, v4, v0 2758; CGP-NEXT: v_mul_lo_u32 v4, v4, v13 2759; CGP-NEXT: v_mul_lo_u32 v15, v13, v12 2760; CGP-NEXT: v_mul_hi_u32 v16, v0, v12 2761; CGP-NEXT: v_mul_hi_u32 v12, v13, v12 2762; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v6, v4 2763; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v14 2764; CGP-NEXT: v_mul_lo_u32 v6, v0, v4 2765; CGP-NEXT: v_mul_lo_u32 v14, v13, v4 2766; CGP-NEXT: v_mul_hi_u32 v17, v0, v4 2767; CGP-NEXT: v_mul_hi_u32 v4, v13, v4 2768; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v15, v6 2769; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 2770; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 2771; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] 2772; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v16 2773; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 2774; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 2775; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 2776; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v13, v6 2777; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v15 2778; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v12, v6 2779; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 2780; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 2781; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v12 2782; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc 2783; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v6 2784; CGP-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2785; CGP-NEXT: v_mul_lo_u32 v4, v7, v0 2786; CGP-NEXT: v_mul_hi_u32 v6, v5, v0 2787; CGP-NEXT: v_mul_hi_u32 v0, v7, v0 2788; CGP-NEXT: v_mul_lo_u32 v12, v5, v1 2789; CGP-NEXT: v_mul_lo_u32 v13, v7, v1 2790; CGP-NEXT: v_mul_hi_u32 v14, v5, v1 2791; CGP-NEXT: v_mul_hi_u32 v1, v7, v1 2792; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 2793; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2794; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0 2795; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2796; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 2797; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2798; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14 2799; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2800; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4 2801; CGP-NEXT: v_add_i32_e32 v6, vcc, v13, v6 2802; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 2803; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2804; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 2805; CGP-NEXT: v_mul_lo_u32 v6, v10, v0 2806; CGP-NEXT: v_mul_lo_u32 v12, v11, v0 2807; CGP-NEXT: v_mul_hi_u32 v0, v10, v0 2808; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 2809; CGP-NEXT: v_mul_lo_u32 v1, v10, v1 2810; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1 2811; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 2812; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v6 2813; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v7, v0, vcc 2814; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v7, v0 2815; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v10 2816; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] 2817; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v11 2818; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 2819; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v11, vcc 2820; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v11 2821; CGP-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc 2822; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v10 2823; CGP-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v0, vcc 2824; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v10 2825; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] 2826; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v11, vcc 2827; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v11 2828; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc 2829; CGP-NEXT: v_sub_i32_e32 v15, vcc, v7, v10 2830; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc 2831; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v11 2832; CGP-NEXT: v_cndmask_b32_e32 v11, v14, v13, vcc 2833; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 2834; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc 2835; CGP-NEXT: v_cndmask_b32_e32 v11, v12, v0, vcc 2836; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 2837; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v7, vcc 2838; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v11, vcc 2839; CGP-NEXT: BB8_2: ; %Flow2 2840; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] 2841; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] 2842; CGP-NEXT: s_cbranch_execz BB8_4 2843; CGP-NEXT: ; %bb.3: 2844; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 2845; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v10 2846; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 2847; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2848; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 2849; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 2850; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 2851; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 2852; CGP-NEXT: v_mul_hi_u32 v0, v5, v0 2853; CGP-NEXT: v_mul_lo_u32 v0, v0, v10 2854; CGP-NEXT: v_sub_i32_e32 v0, vcc, v5, v0 2855; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v10 2856; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10 2857; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2858; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v10 2859; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10 2860; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2861; CGP-NEXT: v_mov_b32_e32 v1, 0 2862; CGP-NEXT: BB8_4: 2863; CGP-NEXT: s_or_b64 exec, exec, s[4:5] 2864; CGP-NEXT: v_or_b32_e32 v5, v3, v9 2865; CGP-NEXT: v_mov_b32_e32 v4, 0 2866; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 2867; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 2868; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] 2869; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 2870; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 2871; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] 2872; CGP-NEXT: s_cbranch_execz BB8_6 2873; CGP-NEXT: ; %bb.5: 2874; CGP-NEXT: v_cvt_f32_u32_e32 v4, v8 2875; CGP-NEXT: v_cvt_f32_u32_e32 v5, v9 2876; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v8 2877; CGP-NEXT: v_subb_u32_e32 v7, vcc, 0, v9, vcc 2878; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 2879; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 2880; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 2881; CGP-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 2882; CGP-NEXT: v_trunc_f32_e32 v5, v5 2883; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 2884; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 2885; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 2886; CGP-NEXT: v_mul_lo_u32 v10, v6, v5 2887; CGP-NEXT: v_mul_lo_u32 v11, v6, v4 2888; CGP-NEXT: v_mul_lo_u32 v12, v7, v4 2889; CGP-NEXT: v_mul_hi_u32 v13, v6, v4 2890; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 2891; CGP-NEXT: v_mul_lo_u32 v12, v5, v11 2892; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 2893; CGP-NEXT: v_mul_hi_u32 v11, v5, v11 2894; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 2895; CGP-NEXT: v_mul_lo_u32 v13, v4, v10 2896; CGP-NEXT: v_mul_lo_u32 v15, v5, v10 2897; CGP-NEXT: v_mul_hi_u32 v16, v4, v10 2898; CGP-NEXT: v_mul_hi_u32 v10, v5, v10 2899; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 2900; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 2901; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 2902; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 2903; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 2904; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2905; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 2906; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc 2907; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 2908; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14 2909; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 2910; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 2911; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12 2912; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 2913; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 2914; CGP-NEXT: v_addc_u32_e64 v11, s[4:5], v5, v10, vcc 2915; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v5, v10 2916; CGP-NEXT: v_mul_lo_u32 v10, v6, v4 2917; CGP-NEXT: v_mul_lo_u32 v7, v7, v4 2918; CGP-NEXT: v_mul_hi_u32 v12, v6, v4 2919; CGP-NEXT: v_mul_lo_u32 v6, v6, v11 2920; CGP-NEXT: v_mul_lo_u32 v13, v11, v10 2921; CGP-NEXT: v_mul_hi_u32 v14, v4, v10 2922; CGP-NEXT: v_mul_hi_u32 v10, v11, v10 2923; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 2924; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v12 2925; CGP-NEXT: v_mul_lo_u32 v7, v4, v6 2926; CGP-NEXT: v_mul_lo_u32 v12, v11, v6 2927; CGP-NEXT: v_mul_hi_u32 v15, v4, v6 2928; CGP-NEXT: v_mul_hi_u32 v6, v11, v6 2929; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v13, v7 2930; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] 2931; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 2932; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] 2933; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v14 2934; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] 2935; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 2936; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] 2937; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7 2938; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v13 2939; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v10, v7 2940; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] 2941; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 2942; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 2943; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc 2944; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v7 2945; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2946; CGP-NEXT: v_mul_lo_u32 v6, v3, v4 2947; CGP-NEXT: v_mul_hi_u32 v7, v2, v4 2948; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 2949; CGP-NEXT: v_mul_lo_u32 v10, v2, v5 2950; CGP-NEXT: v_mul_lo_u32 v11, v3, v5 2951; CGP-NEXT: v_mul_hi_u32 v12, v2, v5 2952; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 2953; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 2954; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 2955; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4 2956; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 2957; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 2958; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2959; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12 2960; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 2961; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 2962; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 2963; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 2964; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 2965; CGP-NEXT: v_add_i32_e32 v6, vcc, v7, v6 2966; CGP-NEXT: v_mul_lo_u32 v7, v8, v4 2967; CGP-NEXT: v_mul_lo_u32 v10, v9, v4 2968; CGP-NEXT: v_mul_hi_u32 v4, v8, v4 2969; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v6 2970; CGP-NEXT: v_mul_lo_u32 v5, v8, v5 2971; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 2972; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 2973; CGP-NEXT: v_sub_i32_e32 v5, vcc, v2, v7 2974; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc 2975; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 2976; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v8 2977; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] 2978; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v9 2979; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 2980; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc 2981; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v9 2982; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc 2983; CGP-NEXT: v_sub_i32_e32 v7, vcc, v5, v8 2984; CGP-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v3, vcc 2985; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v8 2986; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] 2987; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v9, vcc 2988; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v10, v9 2989; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 2990; CGP-NEXT: v_sub_i32_e32 v13, vcc, v7, v8 2991; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc 2992; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 2993; CGP-NEXT: v_cndmask_b32_e32 v9, v12, v11, vcc 2994; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 2995; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v13, vcc 2996; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc 2997; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 2998; CGP-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc 2999; CGP-NEXT: v_cndmask_b32_e32 v5, v6, v3, vcc 3000; CGP-NEXT: BB8_6: ; %Flow 3001; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] 3002; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] 3003; CGP-NEXT: s_cbranch_execz BB8_8 3004; CGP-NEXT: ; %bb.7: 3005; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8 3006; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v8 3007; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 3008; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 3009; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 3010; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 3011; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 3012; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 3013; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 3014; CGP-NEXT: v_mul_lo_u32 v3, v3, v8 3015; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 3016; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v8 3017; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 3018; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 3019; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v8 3020; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 3021; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc 3022; CGP-NEXT: v_mov_b32_e32 v5, 0 3023; CGP-NEXT: BB8_8: 3024; CGP-NEXT: s_or_b64 exec, exec, s[4:5] 3025; CGP-NEXT: v_mov_b32_e32 v2, v4 3026; CGP-NEXT: v_mov_b32_e32 v3, v5 3027; CGP-NEXT: s_setpc_b64 s[30:31] 3028 %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y 3029 %r = urem <2 x i64> %x, %shl.y 3030 ret <2 x i64> %r 3031} 3032 3033define i64 @v_urem_i64_24bit(i64 %num, i64 %den) { 3034; GISEL-LABEL: v_urem_i64_24bit: 3035; GISEL: ; %bb.0: 3036; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3037; GISEL-NEXT: s_mov_b32 s4, 0xffffff 3038; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 3039; GISEL-NEXT: v_and_b32_e32 v1, s4, v2 3040; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 3041; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 3042; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 3043; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 3044; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 3045; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 3046; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 3047; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 3048; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 3049; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 3050; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 3051; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 3052; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 3053; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3054; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 3055; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 3056; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3057; GISEL-NEXT: v_mov_b32_e32 v1, 0 3058; GISEL-NEXT: s_setpc_b64 s[30:31] 3059; 3060; CGP-LABEL: v_urem_i64_24bit: 3061; CGP: ; %bb.0: 3062; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3063; CGP-NEXT: s_mov_b32 s4, 0xffffff 3064; CGP-NEXT: v_and_b32_e32 v0, s4, v0 3065; CGP-NEXT: v_and_b32_e32 v1, s4, v2 3066; CGP-NEXT: v_cvt_f32_u32_e32 v2, v0 3067; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1 3068; CGP-NEXT: v_rcp_f32_e32 v4, v3 3069; CGP-NEXT: v_mul_f32_e32 v4, v2, v4 3070; CGP-NEXT: v_trunc_f32_e32 v4, v4 3071; CGP-NEXT: v_mad_f32 v2, -v4, v3, v2 3072; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 3073; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, v3 3074; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] 3075; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 3076; CGP-NEXT: v_mul_lo_u32 v1, v2, v1 3077; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 3078; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0 3079; CGP-NEXT: v_mov_b32_e32 v1, 0 3080; CGP-NEXT: s_setpc_b64 s[30:31] 3081 %num.mask = and i64 %num, 16777215 3082 %den.mask = and i64 %den, 16777215 3083 %result = urem i64 %num.mask, %den.mask 3084 ret i64 %result 3085} 3086 3087define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { 3088; GISEL-LABEL: v_urem_v2i64_24bit: 3089; GISEL: ; %bb.0: 3090; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3091; GISEL-NEXT: s_mov_b32 s6, 0xffffff 3092; GISEL-NEXT: v_cvt_f32_u32_e32 v1, 0 3093; GISEL-NEXT: v_and_b32_e32 v3, s6, v4 3094; GISEL-NEXT: v_and_b32_e32 v4, s6, v6 3095; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 3096; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v3 3097; GISEL-NEXT: v_subb_u32_e64 v7, s[4:5], 0, 0, vcc 3098; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4 3099; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v4 3100; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc 3101; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v1 3102; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v1 3103; GISEL-NEXT: v_rcp_iflag_f32_e32 v1, v5 3104; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v8 3105; GISEL-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 3106; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 3107; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v1 3108; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v5 3109; GISEL-NEXT: v_trunc_f32_e32 v8, v8 3110; GISEL-NEXT: v_trunc_f32_e32 v11, v11 3111; GISEL-NEXT: v_mac_f32_e32 v1, 0xcf800000, v8 3112; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 3113; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v11 3114; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 3115; GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 3116; GISEL-NEXT: v_mul_lo_u32 v12, v6, v8 3117; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 3118; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11 3119; GISEL-NEXT: v_mul_lo_u32 v14, v6, v1 3120; GISEL-NEXT: v_mul_lo_u32 v15, v7, v1 3121; GISEL-NEXT: v_mul_hi_u32 v16, v6, v1 3122; GISEL-NEXT: v_mul_lo_u32 v17, v9, v5 3123; GISEL-NEXT: v_mul_lo_u32 v18, v10, v5 3124; GISEL-NEXT: v_mul_hi_u32 v19, v9, v5 3125; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 3126; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13 3127; GISEL-NEXT: v_mul_lo_u32 v15, v11, v17 3128; GISEL-NEXT: v_mul_hi_u32 v18, v5, v17 3129; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19 3130; GISEL-NEXT: v_mul_lo_u32 v19, v5, v13 3131; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19 3132; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 3133; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v18 3134; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14 3135; GISEL-NEXT: v_mul_hi_u32 v18, v1, v14 3136; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14 3137; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17 3138; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v16 3139; GISEL-NEXT: v_mul_lo_u32 v16, v1, v12 3140; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v16 3141; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] 3142; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 3143; GISEL-NEXT: v_mul_lo_u32 v15, v8, v12 3144; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] 3145; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18 3146; GISEL-NEXT: v_mul_hi_u32 v18, v1, v12 3147; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 3148; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] 3149; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v18 3150; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] 3151; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 3152; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 3153; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18 3154; GISEL-NEXT: v_mul_lo_u32 v19, v11, v13 3155; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17 3156; GISEL-NEXT: v_mul_hi_u32 v19, v5, v13 3157; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc 3158; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 3159; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc 3160; GISEL-NEXT: v_add_i32_e32 v19, vcc, v20, v19 3161; GISEL-NEXT: v_and_b32_e32 v0, s6, v0 3162; GISEL-NEXT: v_and_b32_e32 v2, s6, v2 3163; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 3164; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13 3165; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 3166; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 3167; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 3168; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc 3169; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 3170; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18 3171; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15 3172; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16 3173; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 3174; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v8, v12, vcc 3175; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12 3176; GISEL-NEXT: v_mul_lo_u32 v12, v6, v1 3177; GISEL-NEXT: v_mul_lo_u32 v7, v7, v1 3178; GISEL-NEXT: v_mul_hi_u32 v15, v6, v1 3179; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v17 3180; GISEL-NEXT: v_addc_u32_e64 v16, s[6:7], v11, v13, s[4:5] 3181; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v13 3182; GISEL-NEXT: v_mul_lo_u32 v13, v9, v5 3183; GISEL-NEXT: v_mul_lo_u32 v10, v10, v5 3184; GISEL-NEXT: v_mul_hi_u32 v17, v9, v5 3185; GISEL-NEXT: v_mul_lo_u32 v6, v6, v14 3186; GISEL-NEXT: v_mul_lo_u32 v18, v14, v12 3187; GISEL-NEXT: v_mul_hi_u32 v19, v1, v12 3188; GISEL-NEXT: v_mul_hi_u32 v12, v14, v12 3189; GISEL-NEXT: v_mul_lo_u32 v9, v9, v16 3190; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v7, v6 3191; GISEL-NEXT: v_mul_lo_u32 v7, v16, v13 3192; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v10, v9 3193; GISEL-NEXT: v_mul_hi_u32 v10, v5, v13 3194; GISEL-NEXT: v_mul_hi_u32 v13, v16, v13 3195; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v15 3196; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v17 3197; GISEL-NEXT: v_mul_lo_u32 v15, v1, v6 3198; GISEL-NEXT: v_mul_lo_u32 v17, v5, v9 3199; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v17 3200; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] 3201; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v10 3202; GISEL-NEXT: v_mul_lo_u32 v7, v14, v6 3203; GISEL-NEXT: v_mul_hi_u32 v10, v1, v6 3204; GISEL-NEXT: v_mul_hi_u32 v6, v14, v6 3205; GISEL-NEXT: v_mul_lo_u32 v14, v16, v9 3206; GISEL-NEXT: v_mul_hi_u32 v16, v16, v9 3207; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9 3208; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15 3209; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] 3210; GISEL-NEXT: v_add_i32_e64 v7, s[8:9], v7, v12 3211; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9] 3212; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v14, v13 3213; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] 3214; GISEL-NEXT: v_add_i32_e64 v15, s[8:9], v15, v19 3215; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] 3216; GISEL-NEXT: v_add_i32_e64 v7, s[8:9], v7, v10 3217; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[8:9] 3218; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] 3219; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v13, v9 3220; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7] 3221; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v18, v15 3222; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v12, v10 3223; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v17, v19 3224; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v14, v13 3225; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v15 3226; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] 3227; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12 3228; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[6:7] 3229; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v14 3230; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v13, v12 3231; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v10 3232; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v16, v12 3233; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v8, v6, vcc 3234; GISEL-NEXT: v_addc_u32_e64 v8, vcc, v11, v10, s[4:5] 3235; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v7 3236; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc 3237; GISEL-NEXT: v_mul_lo_u32 v7, 0, v1 3238; GISEL-NEXT: v_mul_hi_u32 v10, v0, v1 3239; GISEL-NEXT: v_mul_hi_u32 v1, 0, v1 3240; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 3241; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc 3242; GISEL-NEXT: v_mul_lo_u32 v9, 0, v5 3243; GISEL-NEXT: v_mul_hi_u32 v11, v2, v5 3244; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5 3245; GISEL-NEXT: v_mul_lo_u32 v12, v0, v6 3246; GISEL-NEXT: v_mul_lo_u32 v13, 0, v6 3247; GISEL-NEXT: v_mul_hi_u32 v14, v0, v6 3248; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6 3249; GISEL-NEXT: v_mul_lo_u32 v15, v2, v8 3250; GISEL-NEXT: v_mul_lo_u32 v16, 0, v8 3251; GISEL-NEXT: v_mul_hi_u32 v17, v2, v8 3252; GISEL-NEXT: v_mul_hi_u32 v8, 0, v8 3253; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v12 3254; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc 3255; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1 3256; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc 3257; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15 3258; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc 3259; GISEL-NEXT: v_add_i32_e32 v5, vcc, v16, v5 3260; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc 3261; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 3262; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 3263; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v14 3264; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 3265; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 3266; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 3267; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v17 3268; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 3269; GISEL-NEXT: v_add_i32_e32 v7, vcc, v12, v7 3270; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 3271; GISEL-NEXT: v_add_i32_e32 v9, vcc, v15, v9 3272; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 3273; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v7 3274; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 3275; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9 3276; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 3277; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 3278; GISEL-NEXT: v_mul_lo_u32 v10, v3, v1 3279; GISEL-NEXT: v_mul_lo_u32 v12, 0, v1 3280; GISEL-NEXT: v_mul_hi_u32 v1, v3, v1 3281; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 3282; GISEL-NEXT: v_mul_lo_u32 v11, v4, v5 3283; GISEL-NEXT: v_mul_lo_u32 v13, 0, v5 3284; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 3285; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 3286; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 3287; GISEL-NEXT: v_mul_lo_u32 v6, v3, v6 3288; GISEL-NEXT: v_mul_lo_u32 v7, v4, v7 3289; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6 3290; GISEL-NEXT: v_add_i32_e32 v7, vcc, v13, v7 3291; GISEL-NEXT: v_add_i32_e32 v1, vcc, v6, v1 3292; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5 3293; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 3294; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], 0, v1, vcc 3295; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], 0, v1 3296; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 3297; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 3298; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11 3299; GISEL-NEXT: v_subb_u32_e64 v8, s[6:7], 0, v5, s[4:5] 3300; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], 0, v5 3301; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v4 3302; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] 3303; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v6 3304; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[6:7] 3305; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 3306; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v8 3307; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc 3308; GISEL-NEXT: v_subbrev_u32_e64 v5, vcc, 0, v5, s[4:5] 3309; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 3310; GISEL-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc 3311; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v0, v3 3312; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 3313; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v3 3314; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 3315; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 3316; GISEL-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc 3317; GISEL-NEXT: v_sub_i32_e32 v11, vcc, v2, v4 3318; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc 3319; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v4 3320; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc 3321; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 3322; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc 3323; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v10, v3 3324; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v1, vcc 3325; GISEL-NEXT: v_cmp_le_u32_e32 vcc, 0, v5 3326; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc 3327; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v11, v4 3328; GISEL-NEXT: v_subbrev_u32_e32 v17, vcc, 0, v5, vcc 3329; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 3330; GISEL-NEXT: v_cndmask_b32_e32 v12, v14, v12, vcc 3331; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 3332; GISEL-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc 3333; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 3334; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc 3335; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 3336; GISEL-NEXT: v_cndmask_b32_e64 v4, v11, v4, s[4:5] 3337; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 3338; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 3339; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3340; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v17, s[4:5] 3341; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9 3342; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] 3343; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 3344; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5] 3345; GISEL-NEXT: s_setpc_b64 s[30:31] 3346; 3347; CGP-LABEL: v_urem_v2i64_24bit: 3348; CGP: ; %bb.0: 3349; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3350; CGP-NEXT: s_mov_b32 s6, 0xffffff 3351; CGP-NEXT: v_mov_b32_e32 v1, 0 3352; CGP-NEXT: v_and_b32_e32 v0, s6, v0 3353; CGP-NEXT: v_and_b32_e32 v2, s6, v2 3354; CGP-NEXT: v_and_b32_e32 v3, s6, v4 3355; CGP-NEXT: v_and_b32_e32 v4, s6, v6 3356; CGP-NEXT: v_cvt_f32_u32_e32 v5, v0 3357; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 3358; CGP-NEXT: v_cvt_f32_u32_e32 v7, v2 3359; CGP-NEXT: v_cvt_f32_u32_e32 v8, v4 3360; CGP-NEXT: v_rcp_f32_e32 v9, v6 3361; CGP-NEXT: v_rcp_f32_e32 v10, v8 3362; CGP-NEXT: v_mul_f32_e32 v9, v5, v9 3363; CGP-NEXT: v_mul_f32_e32 v10, v7, v10 3364; CGP-NEXT: v_trunc_f32_e32 v9, v9 3365; CGP-NEXT: v_trunc_f32_e32 v10, v10 3366; CGP-NEXT: v_mad_f32 v5, -v9, v6, v5 3367; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 3368; CGP-NEXT: v_mad_f32 v7, -v10, v8, v7 3369; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 3370; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v5|, v6 3371; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] 3372; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v7|, v8 3373; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] 3374; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5 3375; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 3376; CGP-NEXT: v_mul_lo_u32 v3, v5, v3 3377; CGP-NEXT: v_mul_lo_u32 v4, v6, v4 3378; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 3379; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 3380; CGP-NEXT: v_and_b32_e32 v0, s6, v0 3381; CGP-NEXT: v_and_b32_e32 v2, s6, v2 3382; CGP-NEXT: v_mov_b32_e32 v3, v1 3383; CGP-NEXT: s_setpc_b64 s[30:31] 3384 %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215> 3385 %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215> 3386 %result = urem <2 x i64> %num.mask, %den.mask 3387 ret <2 x i64> %result 3388} 3389