1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=GFX8 %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 4; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 5 6define amdgpu_kernel void @sdivrem_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) { 7; GFX8-LABEL: sdivrem_i32: 8; GFX8: ; %bb.0: 9; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 10; GFX8-NEXT: s_waitcnt lgkmcnt(0) 11; GFX8-NEXT: s_ashr_i32 s6, s1, 31 12; GFX8-NEXT: s_add_i32 s1, s1, s6 13; GFX8-NEXT: s_xor_b32 s7, s1, s6 14; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s7 15; GFX8-NEXT: s_sub_i32 s1, 0, s7 16; GFX8-NEXT: s_ashr_i32 s8, s0, 31 17; GFX8-NEXT: s_add_i32 s0, s0, s8 18; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 19; GFX8-NEXT: s_xor_b32 s9, s0, s8 20; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 21; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 22; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 23; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 24; GFX8-NEXT: s_xor_b32 s4, s8, s6 25; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 26; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 27; GFX8-NEXT: v_mul_hi_u32 v2, s9, v0 28; GFX8-NEXT: s_waitcnt lgkmcnt(0) 29; GFX8-NEXT: v_mov_b32_e32 v0, s0 30; GFX8-NEXT: v_mov_b32_e32 v1, s1 31; GFX8-NEXT: v_mul_lo_u32 v3, v2, s7 32; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 33; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s9, v3 34; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 35; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 36; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 37; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 38; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 39; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 40; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 41; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 42; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2 43; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 44; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s4, v2 45; GFX8-NEXT: v_xor_b32_e32 v3, s8, v3 46; GFX8-NEXT: flat_store_dword v[0:1], v2 47; GFX8-NEXT: v_mov_b32_e32 v0, s2 48; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s8, v3 49; GFX8-NEXT: v_mov_b32_e32 v1, s3 50; GFX8-NEXT: flat_store_dword v[0:1], v3 51; GFX8-NEXT: s_endpgm 52; 53; GFX9-LABEL: sdivrem_i32: 54; GFX9: ; %bb.0: 55; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 56; GFX9-NEXT: v_mov_b32_e32 v2, 0 57; GFX9-NEXT: s_waitcnt lgkmcnt(0) 58; GFX9-NEXT: s_ashr_i32 s6, s1, 31 59; GFX9-NEXT: s_add_i32 s1, s1, s6 60; GFX9-NEXT: s_xor_b32 s7, s1, s6 61; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 62; GFX9-NEXT: s_sub_i32 s1, 0, s7 63; GFX9-NEXT: s_ashr_i32 s8, s0, 31 64; GFX9-NEXT: s_add_i32 s0, s0, s8 65; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 66; GFX9-NEXT: s_xor_b32 s9, s0, s8 67; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 68; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 69; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 70; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 71; GFX9-NEXT: s_xor_b32 s4, s8, s6 72; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 73; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 74; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 75; GFX9-NEXT: v_mul_lo_u32 v1, v0, s7 76; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 77; GFX9-NEXT: v_sub_u32_e32 v1, s9, v1 78; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 79; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 80; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 81; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 82; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 83; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 84; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 85; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 86; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 87; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 88; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 89; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 90; GFX9-NEXT: v_subrev_u32_e32 v1, s8, v1 91; GFX9-NEXT: s_waitcnt lgkmcnt(0) 92; GFX9-NEXT: global_store_dword v2, v0, s[0:1] 93; GFX9-NEXT: global_store_dword v2, v1, s[2:3] 94; GFX9-NEXT: s_endpgm 95; 96; GFX10-LABEL: sdivrem_i32: 97; GFX10: ; %bb.0: 98; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 99; GFX10-NEXT: s_waitcnt lgkmcnt(0) 100; GFX10-NEXT: s_ashr_i32 s6, s1, 31 101; GFX10-NEXT: s_ashr_i32 s8, s0, 31 102; GFX10-NEXT: s_add_i32 s1, s1, s6 103; GFX10-NEXT: s_add_i32 s0, s0, s8 104; GFX10-NEXT: s_xor_b32 s7, s1, s6 105; GFX10-NEXT: s_xor_b32 s0, s0, s8 106; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s7 107; GFX10-NEXT: s_sub_i32 s1, 0, s7 108; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 109; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 110; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 111; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 112; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 113; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 114; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 115; GFX10-NEXT: v_mul_lo_u32 v1, v0, s7 116; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 117; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 118; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 119; GFX10-NEXT: s_xor_b32 s4, s8, s6 120; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 121; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 122; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 123; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 124; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 125; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 126; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 127; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 128; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 129; GFX10-NEXT: v_mov_b32_e32 v2, 0 130; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 131; GFX10-NEXT: v_xor_b32_e32 v1, s8, v1 132; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 133; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s8, v1 134; GFX10-NEXT: s_waitcnt lgkmcnt(0) 135; GFX10-NEXT: global_store_dword v2, v0, s[0:1] 136; GFX10-NEXT: global_store_dword v2, v1, s[2:3] 137; GFX10-NEXT: s_endpgm 138 %div = sdiv i32 %x, %y 139 store i32 %div, i32 addrspace(1)* %out0 140 %rem = srem i32 %x, %y 141 store i32 %rem, i32 addrspace(1)* %out1 142 ret void 143} 144 145define amdgpu_kernel void @sdivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)* %out1, i64 %x, i64 %y) { 146; GFX8-LABEL: sdivrem_i64: 147; GFX8: ; %bb.0: 148; GFX8-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 149; GFX8-NEXT: s_waitcnt lgkmcnt(0) 150; GFX8-NEXT: s_ashr_i32 s12, s11, 31 151; GFX8-NEXT: s_ashr_i32 s2, s9, 31 152; GFX8-NEXT: s_add_u32 s0, s8, s2 153; GFX8-NEXT: s_cselect_b32 s1, 1, 0 154; GFX8-NEXT: s_and_b32 s1, s1, 1 155; GFX8-NEXT: s_cmp_lg_u32 s1, 0 156; GFX8-NEXT: s_addc_u32 s1, s9, s2 157; GFX8-NEXT: s_add_u32 s8, s10, s12 158; GFX8-NEXT: s_cselect_b32 s3, 1, 0 159; GFX8-NEXT: s_and_b32 s3, s3, 1 160; GFX8-NEXT: s_cmp_lg_u32 s3, 0 161; GFX8-NEXT: s_mov_b32 s13, s12 162; GFX8-NEXT: s_addc_u32 s9, s11, s12 163; GFX8-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] 164; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s9 165; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s8 166; GFX8-NEXT: s_mov_b32 s3, s2 167; GFX8-NEXT: s_xor_b64 s[10:11], s[0:1], s[2:3] 168; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 169; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 170; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 171; GFX8-NEXT: s_sub_u32 s14, 0, s8 172; GFX8-NEXT: s_cselect_b32 s0, 1, 0 173; GFX8-NEXT: s_and_b32 s0, s0, 1 174; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 175; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 176; GFX8-NEXT: v_trunc_f32_e32 v1, v1 177; GFX8-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 178; GFX8-NEXT: v_add_f32_e32 v0, v2, v0 179; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 180; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 181; GFX8-NEXT: s_cmp_lg_u32 s0, 0 182; GFX8-NEXT: s_subb_u32 s15, 0, s9 183; GFX8-NEXT: v_mul_lo_u32 v3, s15, v0 184; GFX8-NEXT: v_mul_lo_u32 v2, s14, v1 185; GFX8-NEXT: v_mul_hi_u32 v5, s14, v0 186; GFX8-NEXT: v_mul_lo_u32 v4, s14, v0 187; GFX8-NEXT: v_mov_b32_e32 v6, s9 188; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 189; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 190; GFX8-NEXT: v_mul_lo_u32 v3, v1, v4 191; GFX8-NEXT: v_mul_lo_u32 v5, v0, v2 192; GFX8-NEXT: v_mul_hi_u32 v7, v0, v4 193; GFX8-NEXT: v_mul_hi_u32 v4, v1, v4 194; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 195; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 196; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 197; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 198; GFX8-NEXT: v_mul_lo_u32 v7, v1, v2 199; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 200; GFX8-NEXT: v_mul_hi_u32 v5, v0, v2 201; GFX8-NEXT: v_mul_hi_u32 v2, v1, v2 202; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4 203; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 204; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5 205; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 206; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 207; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 208; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 209; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 210; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 211; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 212; GFX8-NEXT: v_addc_u32_e64 v3, s[0:1], v1, v2, vcc 213; GFX8-NEXT: v_mul_lo_u32 v4, s15, v0 214; GFX8-NEXT: v_mul_lo_u32 v5, s14, v3 215; GFX8-NEXT: v_mul_hi_u32 v8, s14, v0 216; GFX8-NEXT: v_mul_lo_u32 v7, s14, v0 217; GFX8-NEXT: v_add_u32_e64 v1, s[0:1], v1, v2 218; GFX8-NEXT: v_add_u32_e64 v4, s[0:1], v4, v5 219; GFX8-NEXT: v_add_u32_e64 v4, s[0:1], v4, v8 220; GFX8-NEXT: v_mul_lo_u32 v5, v3, v7 221; GFX8-NEXT: v_mul_lo_u32 v8, v0, v4 222; GFX8-NEXT: v_mul_hi_u32 v2, v0, v7 223; GFX8-NEXT: v_mul_hi_u32 v7, v3, v7 224; GFX8-NEXT: v_add_u32_e64 v5, s[0:1], v5, v8 225; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1] 226; GFX8-NEXT: v_add_u32_e64 v2, s[0:1], v5, v2 227; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 228; GFX8-NEXT: v_mul_lo_u32 v5, v3, v4 229; GFX8-NEXT: v_add_u32_e64 v2, s[0:1], v8, v2 230; GFX8-NEXT: v_mul_hi_u32 v8, v0, v4 231; GFX8-NEXT: v_mul_hi_u32 v3, v3, v4 232; GFX8-NEXT: v_add_u32_e64 v5, s[0:1], v5, v7 233; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[0:1] 234; GFX8-NEXT: v_add_u32_e64 v5, s[0:1], v5, v8 235; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1] 236; GFX8-NEXT: v_add_u32_e64 v7, s[0:1], v7, v8 237; GFX8-NEXT: v_add_u32_e64 v2, s[0:1], v5, v2 238; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[0:1] 239; GFX8-NEXT: v_add_u32_e64 v4, s[0:1], v7, v5 240; GFX8-NEXT: v_add_u32_e64 v3, s[0:1], v3, v4 241; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 242; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 243; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 244; GFX8-NEXT: v_mul_lo_u32 v2, s11, v0 245; GFX8-NEXT: v_mul_lo_u32 v3, s10, v1 246; GFX8-NEXT: v_mul_hi_u32 v5, s10, v0 247; GFX8-NEXT: v_mul_hi_u32 v0, s11, v0 248; GFX8-NEXT: v_mov_b32_e32 v4, s11 249; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 250; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 251; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 252; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 253; GFX8-NEXT: v_mul_lo_u32 v5, s11, v1 254; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 255; GFX8-NEXT: v_mul_hi_u32 v3, s10, v1 256; GFX8-NEXT: v_mul_hi_u32 v1, s11, v1 257; GFX8-NEXT: v_add_u32_e32 v0, vcc, v5, v0 258; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 259; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 260; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 261; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 262; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 263; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 264; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 265; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 266; GFX8-NEXT: v_mul_lo_u32 v2, s9, v0 267; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1 268; GFX8-NEXT: v_mul_hi_u32 v7, s8, v0 269; GFX8-NEXT: v_mul_lo_u32 v5, s8, v0 270; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 271; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 272; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s10, v5 273; GFX8-NEXT: v_subb_u32_e64 v4, s[0:1], v4, v2, vcc 274; GFX8-NEXT: v_sub_u32_e64 v2, s[0:1], s11, v2 275; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v4 276; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] 277; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v3 278; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] 279; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v4 280; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc 281; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[0:1] 282; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s8, v3 283; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v2, vcc 284; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v0 285; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v1, s[0:1] 286; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v8 287; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc 288; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] 289; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7 290; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s8, v7 291; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] 292; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v8 293; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] 294; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v9 295; GFX8-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc 296; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1] 297; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 298; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 299; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 300; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc 301; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc 302; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v6, vcc 303; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1] 304; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[0:1] 305; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v10, s[0:1] 306; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[0:1] 307; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[12:13] 308; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 309; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1 310; GFX8-NEXT: v_mov_b32_e32 v4, s1 311; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 312; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc 313; GFX8-NEXT: v_xor_b32_e32 v3, s2, v3 314; GFX8-NEXT: v_xor_b32_e32 v4, s2, v2 315; GFX8-NEXT: v_mov_b32_e32 v5, s2 316; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s2, v3 317; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v4, v5, vcc 318; GFX8-NEXT: v_mov_b32_e32 v4, s4 319; GFX8-NEXT: v_mov_b32_e32 v5, s5 320; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 321; GFX8-NEXT: v_mov_b32_e32 v0, s6 322; GFX8-NEXT: v_mov_b32_e32 v1, s7 323; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 324; GFX8-NEXT: s_endpgm 325; 326; GFX9-LABEL: sdivrem_i64: 327; GFX9: ; %bb.0: 328; GFX9-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 329; GFX9-NEXT: s_waitcnt lgkmcnt(0) 330; GFX9-NEXT: s_ashr_i32 s12, s11, 31 331; GFX9-NEXT: s_ashr_i32 s2, s9, 31 332; GFX9-NEXT: s_add_u32 s0, s8, s2 333; GFX9-NEXT: s_cselect_b32 s1, 1, 0 334; GFX9-NEXT: s_and_b32 s1, s1, 1 335; GFX9-NEXT: s_cmp_lg_u32 s1, 0 336; GFX9-NEXT: s_addc_u32 s1, s9, s2 337; GFX9-NEXT: s_add_u32 s8, s10, s12 338; GFX9-NEXT: s_cselect_b32 s3, 1, 0 339; GFX9-NEXT: s_and_b32 s3, s3, 1 340; GFX9-NEXT: s_cmp_lg_u32 s3, 0 341; GFX9-NEXT: s_mov_b32 s13, s12 342; GFX9-NEXT: s_addc_u32 s9, s11, s12 343; GFX9-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] 344; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s9 345; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s8 346; GFX9-NEXT: s_mov_b32 s3, s2 347; GFX9-NEXT: s_xor_b64 s[10:11], s[0:1], s[2:3] 348; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 349; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 350; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 351; GFX9-NEXT: s_sub_u32 s14, 0, s8 352; GFX9-NEXT: s_cselect_b32 s0, 1, 0 353; GFX9-NEXT: s_and_b32 s0, s0, 1 354; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 355; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 356; GFX9-NEXT: v_trunc_f32_e32 v1, v1 357; GFX9-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 358; GFX9-NEXT: v_add_f32_e32 v0, v2, v0 359; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 360; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 361; GFX9-NEXT: s_cmp_lg_u32 s0, 0 362; GFX9-NEXT: s_subb_u32 s15, 0, s9 363; GFX9-NEXT: v_mul_lo_u32 v3, s15, v0 364; GFX9-NEXT: v_mul_lo_u32 v2, s14, v1 365; GFX9-NEXT: v_mul_hi_u32 v4, s14, v0 366; GFX9-NEXT: v_mul_lo_u32 v5, s14, v0 367; GFX9-NEXT: v_mov_b32_e32 v8, s11 368; GFX9-NEXT: v_add3_u32 v2, v3, v2, v4 369; GFX9-NEXT: v_mul_lo_u32 v3, v1, v5 370; GFX9-NEXT: v_mul_lo_u32 v4, v0, v2 371; GFX9-NEXT: v_mul_hi_u32 v6, v0, v5 372; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 373; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 374; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 375; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 376; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 377; GFX9-NEXT: v_mul_lo_u32 v6, v1, v2 378; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 379; GFX9-NEXT: v_mul_hi_u32 v4, v0, v2 380; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 381; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v6, v5 382; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 383; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v5, v4 384; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 385; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v4, v3 386; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 387; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 388; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 389; GFX9-NEXT: v_add3_u32 v2, v5, v4, v2 390; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], v1, v2, vcc 391; GFX9-NEXT: v_mul_lo_u32 v4, s15, v0 392; GFX9-NEXT: v_mul_lo_u32 v5, s14, v3 393; GFX9-NEXT: v_mul_hi_u32 v6, s14, v0 394; GFX9-NEXT: v_mul_lo_u32 v7, s14, v0 395; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 396; GFX9-NEXT: v_add3_u32 v4, v4, v5, v6 397; GFX9-NEXT: v_mul_lo_u32 v5, v3, v7 398; GFX9-NEXT: v_mul_lo_u32 v6, v0, v4 399; GFX9-NEXT: v_mul_hi_u32 v2, v0, v7 400; GFX9-NEXT: v_mul_hi_u32 v7, v3, v7 401; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v5, v6 402; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] 403; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v5, v2 404; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 405; GFX9-NEXT: v_mul_lo_u32 v5, v3, v4 406; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 407; GFX9-NEXT: v_mul_hi_u32 v6, v0, v4 408; GFX9-NEXT: v_mul_hi_u32 v3, v3, v4 409; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v5, v7 410; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[0:1] 411; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v5, v6 412; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] 413; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v5, v2 414; GFX9-NEXT: v_add_u32_e32 v6, v7, v6 415; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[0:1] 416; GFX9-NEXT: v_add3_u32 v3, v6, v4, v3 417; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 418; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 419; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 420; GFX9-NEXT: v_mul_lo_u32 v2, s11, v0 421; GFX9-NEXT: v_mul_lo_u32 v3, s10, v1 422; GFX9-NEXT: v_mul_hi_u32 v5, s10, v0 423; GFX9-NEXT: v_mul_hi_u32 v0, s11, v0 424; GFX9-NEXT: v_mov_b32_e32 v4, s9 425; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 426; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 427; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 428; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 429; GFX9-NEXT: v_mul_lo_u32 v5, s11, v1 430; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 431; GFX9-NEXT: v_mul_hi_u32 v3, s10, v1 432; GFX9-NEXT: v_mul_hi_u32 v1, s11, v1 433; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v5, v0 434; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 435; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 436; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 437; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 438; GFX9-NEXT: v_add_u32_e32 v3, v5, v3 439; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 440; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1 441; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0 442; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1 443; GFX9-NEXT: v_mul_hi_u32 v5, s8, v0 444; GFX9-NEXT: v_mul_lo_u32 v7, s8, v0 445; GFX9-NEXT: v_mov_b32_e32 v6, 0 446; GFX9-NEXT: v_add3_u32 v2, v2, v3, v5 447; GFX9-NEXT: v_sub_co_u32_e32 v3, vcc, s10, v7 448; GFX9-NEXT: v_subb_co_u32_e64 v5, s[0:1], v8, v2, vcc 449; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v5 450; GFX9-NEXT: v_sub_u32_e32 v2, s11, v2 451; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] 452; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v3 453; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[0:1] 454; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v5 455; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v4, vcc 456; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[0:1] 457; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s8, v3 458; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v2, vcc 459; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v0 460; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v1, s[0:1] 461; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v9 462; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v4, vcc 463; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] 464; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v8 465; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s8, v8 466; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] 467; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v9 468; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[0:1] 469; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v10 470; GFX9-NEXT: v_subbrev_co_u32_e32 v2, vcc, 0, v2, vcc 471; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 472; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v11, s[0:1] 473; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc 474; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7 475; GFX9-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc 476; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc 477; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc 478; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] 479; GFX9-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[0:1] 480; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v10, s[0:1] 481; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v11, s[0:1] 482; GFX9-NEXT: s_xor_b64 s[0:1], s[2:3], s[12:13] 483; GFX9-NEXT: v_xor_b32_e32 v0, s0, v0 484; GFX9-NEXT: v_xor_b32_e32 v1, s1, v1 485; GFX9-NEXT: v_mov_b32_e32 v4, s1 486; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s0, v0 487; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v4, vcc 488; GFX9-NEXT: v_xor_b32_e32 v3, s2, v3 489; GFX9-NEXT: v_xor_b32_e32 v4, s2, v2 490; GFX9-NEXT: v_mov_b32_e32 v5, s2 491; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s2, v3 492; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v5, vcc 493; GFX9-NEXT: global_store_dwordx2 v6, v[0:1], s[4:5] 494; GFX9-NEXT: global_store_dwordx2 v6, v[2:3], s[6:7] 495; GFX9-NEXT: s_endpgm 496; 497; GFX10-LABEL: sdivrem_i64: 498; GFX10: ; %bb.0: 499; GFX10-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 500; GFX10-NEXT: s_waitcnt lgkmcnt(0) 501; GFX10-NEXT: s_ashr_i32 s2, s9, 31 502; GFX10-NEXT: s_ashr_i32 s12, s11, 31 503; GFX10-NEXT: s_add_u32 s0, s8, s2 504; GFX10-NEXT: s_cselect_b32 s1, 1, 0 505; GFX10-NEXT: s_mov_b32 s13, s12 506; GFX10-NEXT: s_and_b32 s1, s1, 1 507; GFX10-NEXT: s_cmp_lg_u32 s1, 0 508; GFX10-NEXT: s_addc_u32 s1, s9, s2 509; GFX10-NEXT: s_add_u32 s8, s10, s12 510; GFX10-NEXT: s_cselect_b32 s3, 1, 0 511; GFX10-NEXT: s_and_b32 s3, s3, 1 512; GFX10-NEXT: s_cmp_lg_u32 s3, 0 513; GFX10-NEXT: s_mov_b32 s3, s2 514; GFX10-NEXT: s_addc_u32 s9, s11, s12 515; GFX10-NEXT: s_xor_b64 s[10:11], s[0:1], s[2:3] 516; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13] 517; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s9 518; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s8 519; GFX10-NEXT: s_sub_u32 s1, 0, s8 520; GFX10-NEXT: s_cselect_b32 s0, 1, 0 521; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 522; GFX10-NEXT: s_and_b32 s0, s0, 1 523; GFX10-NEXT: s_cmp_lg_u32 s0, 0 524; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 525; GFX10-NEXT: s_subb_u32 s14, 0, s9 526; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 527; GFX10-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 528; GFX10-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 529; GFX10-NEXT: v_trunc_f32_e32 v1, v1 530; GFX10-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 531; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 532; GFX10-NEXT: v_add_f32_e32 v0, v2, v0 533; GFX10-NEXT: v_mul_lo_u32 v2, s1, v1 534; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 535; GFX10-NEXT: v_mul_lo_u32 v3, s14, v0 536; GFX10-NEXT: v_mul_hi_u32 v4, s1, v0 537; GFX10-NEXT: v_mul_lo_u32 v5, s1, v0 538; GFX10-NEXT: v_add3_u32 v2, v3, v2, v4 539; GFX10-NEXT: v_mul_lo_u32 v3, v1, v5 540; GFX10-NEXT: v_mul_hi_u32 v6, v1, v5 541; GFX10-NEXT: v_mul_hi_u32 v5, v0, v5 542; GFX10-NEXT: v_mul_lo_u32 v4, v0, v2 543; GFX10-NEXT: v_mul_lo_u32 v7, v1, v2 544; GFX10-NEXT: v_mul_hi_u32 v8, v0, v2 545; GFX10-NEXT: v_mul_hi_u32 v2, v1, v2 546; GFX10-NEXT: v_add_co_u32 v3, s0, v3, v4 547; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 548; GFX10-NEXT: v_add_co_u32 v6, s0, v7, v6 549; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 550; GFX10-NEXT: v_add_co_u32 v3, s0, v3, v5 551; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 552; GFX10-NEXT: v_add_co_u32 v5, s0, v6, v8 553; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 554; GFX10-NEXT: v_add_nc_u32_e32 v3, v4, v3 555; GFX10-NEXT: v_add_nc_u32_e32 v4, v7, v6 556; GFX10-NEXT: v_add_co_u32 v3, s0, v5, v3 557; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 558; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 559; GFX10-NEXT: v_add3_u32 v2, v4, v5, v2 560; GFX10-NEXT: v_mul_lo_u32 v4, s14, v0 561; GFX10-NEXT: v_mul_hi_u32 v5, s1, v0 562; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, v1, v2, vcc_lo 563; GFX10-NEXT: v_mul_lo_u32 v7, s1, v0 564; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 565; GFX10-NEXT: v_mul_lo_u32 v6, s1, v3 566; GFX10-NEXT: v_mul_hi_u32 v8, v3, v7 567; GFX10-NEXT: v_add3_u32 v4, v4, v6, v5 568; GFX10-NEXT: v_mul_lo_u32 v5, v3, v7 569; GFX10-NEXT: v_mul_hi_u32 v7, v0, v7 570; GFX10-NEXT: v_mul_lo_u32 v6, v0, v4 571; GFX10-NEXT: v_mul_lo_u32 v9, v3, v4 572; GFX10-NEXT: v_mul_hi_u32 v10, v0, v4 573; GFX10-NEXT: v_mul_hi_u32 v3, v3, v4 574; GFX10-NEXT: v_add_co_u32 v5, s0, v5, v6 575; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 576; GFX10-NEXT: v_add_co_u32 v8, s0, v9, v8 577; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s0 578; GFX10-NEXT: v_add_co_u32 v5, s0, v5, v7 579; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 580; GFX10-NEXT: v_add_co_u32 v7, s0, v8, v10 581; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0 582; GFX10-NEXT: v_add_nc_u32_e32 v5, v6, v5 583; GFX10-NEXT: v_add_nc_u32_e32 v4, v9, v8 584; GFX10-NEXT: v_add_co_u32 v5, s0, v7, v5 585; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 586; GFX10-NEXT: v_add3_u32 v2, v4, v6, v3 587; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v2, vcc_lo 588; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v5 589; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 590; GFX10-NEXT: v_mul_lo_u32 v2, s11, v0 591; GFX10-NEXT: v_mul_hi_u32 v4, s11, v0 592; GFX10-NEXT: v_mul_hi_u32 v0, s10, v0 593; GFX10-NEXT: v_mul_lo_u32 v3, s10, v1 594; GFX10-NEXT: v_mul_lo_u32 v5, s11, v1 595; GFX10-NEXT: v_mul_hi_u32 v6, s10, v1 596; GFX10-NEXT: v_mul_hi_u32 v1, s11, v1 597; GFX10-NEXT: v_add_co_u32 v2, s0, v2, v3 598; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 599; GFX10-NEXT: v_add_co_u32 v4, s0, v5, v4 600; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 601; GFX10-NEXT: v_add_co_u32 v0, s0, v2, v0 602; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 603; GFX10-NEXT: v_add_co_u32 v2, s0, v4, v6 604; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s0 605; GFX10-NEXT: v_add_nc_u32_e32 v0, v3, v0 606; GFX10-NEXT: v_add_nc_u32_e32 v3, v5, v4 607; GFX10-NEXT: v_add_co_u32 v0, s0, v2, v0 608; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 609; GFX10-NEXT: v_mul_lo_u32 v5, s8, v0 610; GFX10-NEXT: v_add3_u32 v1, v3, v2, v1 611; GFX10-NEXT: v_mul_lo_u32 v2, s9, v0 612; GFX10-NEXT: v_mul_hi_u32 v3, s8, v0 613; GFX10-NEXT: v_mul_lo_u32 v4, s8, v1 614; GFX10-NEXT: v_add3_u32 v2, v2, v4, v3 615; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v0, 1 616; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v1, vcc_lo 617; GFX10-NEXT: v_sub_co_u32 v5, vcc_lo, s10, v5 618; GFX10-NEXT: v_sub_nc_u32_e32 v6, s11, v2 619; GFX10-NEXT: v_sub_co_ci_u32_e64 v2, s0, s11, v2, vcc_lo 620; GFX10-NEXT: v_subrev_co_ci_u32_e32 v6, vcc_lo, s9, v6, vcc_lo 621; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s8, v5 622; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc_lo 623; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v5, s8 624; GFX10-NEXT: v_subrev_co_ci_u32_e64 v9, s0, 0, v6, vcc_lo 625; GFX10-NEXT: v_cmp_le_u32_e64 s0, s9, v2 626; GFX10-NEXT: v_subrev_co_ci_u32_e32 v6, vcc_lo, s9, v6, vcc_lo 627; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, -1, s0 628; GFX10-NEXT: v_cmp_le_u32_e64 s0, s8, v8 629; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, s0 630; GFX10-NEXT: v_cmp_le_u32_e64 s0, s9, v9 631; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, -1, s0 632; GFX10-NEXT: v_add_co_u32 v13, s0, v3, 1 633; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s0, 0, v4, s0 634; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s9, v9 635; GFX10-NEXT: v_cndmask_b32_e64 v11, v12, v11, s0 636; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s9, v2 637; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 638; GFX10-NEXT: v_cndmask_b32_e64 v7, v10, v7, s0 639; GFX10-NEXT: v_sub_co_u32 v10, s0, v8, s8 640; GFX10-NEXT: s_xor_b64 s[8:9], s[2:3], s[12:13] 641; GFX10-NEXT: v_subrev_co_ci_u32_e64 v6, s0, 0, v6, s0 642; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v13, vcc_lo 643; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v7 644; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc_lo 645; GFX10-NEXT: v_cndmask_b32_e32 v7, v8, v10, vcc_lo 646; GFX10-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc_lo 647; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v3, s0 648; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v4, s0 649; GFX10-NEXT: v_cndmask_b32_e64 v3, v5, v7, s0 650; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0 651; GFX10-NEXT: v_mov_b32_e32 v4, 0 652; GFX10-NEXT: v_xor_b32_e32 v0, s8, v0 653; GFX10-NEXT: v_xor_b32_e32 v1, s9, v1 654; GFX10-NEXT: v_xor_b32_e32 v3, s2, v3 655; GFX10-NEXT: v_xor_b32_e32 v5, s2, v2 656; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, s8 657; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, s9, v1, vcc_lo 658; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v3, s2 659; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s2, v5, vcc_lo 660; GFX10-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] 661; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] 662; GFX10-NEXT: s_endpgm 663 %div = sdiv i64 %x, %y 664 store i64 %div, i64 addrspace(1)* %out0 665 %rem = srem i64 %x, %y 666 store i64 %rem, i64 addrspace(1)* %out1 667 ret void 668} 669 670define amdgpu_kernel void @sdivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32> addrspace(1)* %out1, <2 x i32> %x, <2 x i32> %y) { 671; GFX8-LABEL: sdivrem_v2i32: 672; GFX8: ; %bb.0: 673; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x18 674; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10 675; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 676; GFX8-NEXT: s_waitcnt lgkmcnt(0) 677; GFX8-NEXT: s_ashr_i32 s8, s0, 31 678; GFX8-NEXT: s_add_i32 s0, s0, s8 679; GFX8-NEXT: s_xor_b32 s9, s0, s8 680; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s9 681; GFX8-NEXT: s_ashr_i32 s11, s1, 31 682; GFX8-NEXT: s_add_i32 s0, s1, s11 683; GFX8-NEXT: s_sub_i32 s1, 0, s9 684; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 685; GFX8-NEXT: s_xor_b32 s12, s0, s11 686; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s12 687; GFX8-NEXT: s_ashr_i32 s10, s2, 31 688; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 689; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 690; GFX8-NEXT: s_add_i32 s0, s2, s10 691; GFX8-NEXT: s_xor_b32 s0, s0, s10 692; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 693; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 694; GFX8-NEXT: s_ashr_i32 s2, s3, 31 695; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 696; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 697; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 698; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 699; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 700; GFX8-NEXT: v_mul_lo_u32 v2, v0, s9 701; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 702; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s0, v2 703; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s9, v2 704; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 705; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s9, v2 706; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 707; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 708; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s9, v2 709; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 710; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s9, v2 711; GFX8-NEXT: s_sub_i32 s0, 0, s12 712; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 713; GFX8-NEXT: v_mul_lo_u32 v3, s0, v1 714; GFX8-NEXT: s_add_i32 s1, s3, s2 715; GFX8-NEXT: s_xor_b32 s1, s1, s2 716; GFX8-NEXT: s_xor_b32 s0, s10, s8 717; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 718; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 719; GFX8-NEXT: v_xor_b32_e32 v2, s10, v2 720; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 721; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 722; GFX8-NEXT: v_mul_hi_u32 v1, s1, v1 723; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s10, v2 724; GFX8-NEXT: v_mul_lo_u32 v3, v1, s12 725; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 726; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s1, v3 727; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s12, v3 728; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 729; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s12, v3 730; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 731; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 732; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s12, v3 733; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 734; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s12, v3 735; GFX8-NEXT: s_xor_b32 s0, s2, s11 736; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 737; GFX8-NEXT: v_xor_b32_e32 v1, s0, v1 738; GFX8-NEXT: v_mov_b32_e32 v4, s4 739; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, s0, v1 740; GFX8-NEXT: v_mov_b32_e32 v5, s5 741; GFX8-NEXT: v_xor_b32_e32 v3, s2, v3 742; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 743; GFX8-NEXT: v_mov_b32_e32 v0, s6 744; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s2, v3 745; GFX8-NEXT: v_mov_b32_e32 v1, s7 746; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 747; GFX8-NEXT: s_endpgm 748; 749; GFX9-LABEL: sdivrem_v2i32: 750; GFX9: ; %bb.0: 751; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x18 752; GFX9-NEXT: s_waitcnt lgkmcnt(0) 753; GFX9-NEXT: s_ashr_i32 s10, s6, 31 754; GFX9-NEXT: s_add_i32 s0, s6, s10 755; GFX9-NEXT: s_xor_b32 s6, s0, s10 756; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s6 757; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 758; GFX9-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x10 759; GFX9-NEXT: s_ashr_i32 s5, s7, 31 760; GFX9-NEXT: s_add_i32 s7, s7, s5 761; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 762; GFX9-NEXT: s_xor_b32 s7, s7, s5 763; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s7 764; GFX9-NEXT: s_sub_i32 s11, 0, s6 765; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 766; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 767; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 768; GFX9-NEXT: s_waitcnt lgkmcnt(0) 769; GFX9-NEXT: s_ashr_i32 s4, s8, 31 770; GFX9-NEXT: s_add_i32 s8, s8, s4 771; GFX9-NEXT: v_mul_lo_u32 v2, s11, v0 772; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 773; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 774; GFX9-NEXT: s_xor_b32 s8, s8, s4 775; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 776; GFX9-NEXT: s_sub_i32 s12, 0, s7 777; GFX9-NEXT: s_ashr_i32 s11, s9, 31 778; GFX9-NEXT: s_add_i32 s9, s9, s11 779; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 780; GFX9-NEXT: v_mul_hi_u32 v0, s8, v0 781; GFX9-NEXT: v_mul_lo_u32 v2, s12, v1 782; GFX9-NEXT: s_xor_b32 s9, s9, s11 783; GFX9-NEXT: v_mul_lo_u32 v3, v0, s6 784; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 785; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 786; GFX9-NEXT: v_sub_u32_e32 v3, s8, v3 787; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 788; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 789; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 790; GFX9-NEXT: v_subrev_u32_e32 v4, s6, v3 791; GFX9-NEXT: v_mul_hi_u32 v1, s9, v1 792; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 793; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 794; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 795; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 796; GFX9-NEXT: v_subrev_u32_e32 v4, s6, v3 797; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc 798; GFX9-NEXT: v_mul_lo_u32 v3, v1, s7 799; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 800; GFX9-NEXT: v_xor_b32_e32 v2, s4, v2 801; GFX9-NEXT: s_xor_b32 s6, s4, s10 802; GFX9-NEXT: v_sub_u32_e32 v3, s9, v3 803; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 804; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 805; GFX9-NEXT: v_subrev_u32_e32 v4, s7, v3 806; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 807; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 808; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 809; GFX9-NEXT: v_subrev_u32_e32 v2, s4, v2 810; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 811; GFX9-NEXT: v_subrev_u32_e32 v4, s7, v3 812; GFX9-NEXT: s_xor_b32 s4, s11, s5 813; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 814; GFX9-NEXT: v_xor_b32_e32 v0, s6, v0 815; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 816; GFX9-NEXT: v_xor_b32_e32 v3, s11, v3 817; GFX9-NEXT: v_subrev_u32_e32 v0, s6, v0 818; GFX9-NEXT: v_subrev_u32_e32 v1, s4, v1 819; GFX9-NEXT: v_mov_b32_e32 v4, 0 820; GFX9-NEXT: v_subrev_u32_e32 v3, s11, v3 821; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] 822; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] 823; GFX9-NEXT: s_endpgm 824; 825; GFX10-LABEL: sdivrem_v2i32: 826; GFX10: ; %bb.0: 827; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x18 828; GFX10-NEXT: s_waitcnt lgkmcnt(0) 829; GFX10-NEXT: s_ashr_i32 s2, s0, 31 830; GFX10-NEXT: s_ashr_i32 s3, s1, 31 831; GFX10-NEXT: s_add_i32 s0, s0, s2 832; GFX10-NEXT: s_add_i32 s1, s1, s3 833; GFX10-NEXT: s_xor_b32 s8, s0, s2 834; GFX10-NEXT: s_xor_b32 s9, s1, s3 835; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s8 836; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s9 837; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 838; GFX10-NEXT: s_sub_i32 s6, 0, s8 839; GFX10-NEXT: s_sub_i32 s7, 0, s9 840; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 841; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 842; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 843; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 844; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 845; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 846; GFX10-NEXT: s_waitcnt lgkmcnt(0) 847; GFX10-NEXT: s_ashr_i32 s10, s0, 31 848; GFX10-NEXT: s_ashr_i32 s11, s1, 31 849; GFX10-NEXT: s_add_i32 s0, s0, s10 850; GFX10-NEXT: v_mul_lo_u32 v2, s6, v0 851; GFX10-NEXT: v_mul_lo_u32 v3, s7, v1 852; GFX10-NEXT: s_add_i32 s1, s1, s11 853; GFX10-NEXT: s_xor_b32 s0, s0, s10 854; GFX10-NEXT: s_xor_b32 s1, s1, s11 855; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 856; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 857; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 858; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 859; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 860; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 861; GFX10-NEXT: v_mul_hi_u32 v1, s1, v1 862; GFX10-NEXT: v_mul_lo_u32 v2, v0, s8 863; GFX10-NEXT: v_mul_lo_u32 v3, v1, s9 864; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 865; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v1 866; GFX10-NEXT: v_sub_nc_u32_e32 v2, s0, v2 867; GFX10-NEXT: v_sub_nc_u32_e32 v3, s1, v3 868; GFX10-NEXT: s_xor_b32 s1, s10, s2 869; GFX10-NEXT: v_cmp_le_u32_e64 s0, s8, v2 870; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s8, v2 871; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s9, v3 872; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s9, v3 873; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 874; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0 875; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 876; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo 877; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 878; GFX10-NEXT: v_cmp_le_u32_e64 s0, s8, v2 879; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v1 880; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s9, v3 881; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s8, v2 882; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s9, v3 883; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 884; GFX10-NEXT: v_mov_b32_e32 v4, 0 885; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 886; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0 887; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo 888; GFX10-NEXT: s_xor_b32 s0, s11, s3 889; GFX10-NEXT: v_xor_b32_e32 v0, s1, v0 890; GFX10-NEXT: v_xor_b32_e32 v1, s0, v1 891; GFX10-NEXT: v_xor_b32_e32 v2, s10, v2 892; GFX10-NEXT: v_xor_b32_e32 v3, s11, v3 893; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s1, v0 894; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s0, v1 895; GFX10-NEXT: v_subrev_nc_u32_e32 v2, s10, v2 896; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s11, v3 897; GFX10-NEXT: s_waitcnt lgkmcnt(0) 898; GFX10-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] 899; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] 900; GFX10-NEXT: s_endpgm 901 %div = sdiv <2 x i32> %x, %y 902 store <2 x i32> %div, <2 x i32> addrspace(1)* %out0 903 %rem = srem <2 x i32> %x, %y 904 store <2 x i32> %rem, <2 x i32> addrspace(1)* %out1 905 ret void 906} 907 908define amdgpu_kernel void @sdivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, <4 x i32> %x, <4 x i32> %y) { 909; GFX8-LABEL: sdivrem_v4i32: 910; GFX8: ; %bb.0: 911; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x20 912; GFX8-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 913; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x10 914; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f7ffffe 915; GFX8-NEXT: s_waitcnt lgkmcnt(0) 916; GFX8-NEXT: s_ashr_i32 s12, s0, 31 917; GFX8-NEXT: s_add_i32 s0, s0, s12 918; GFX8-NEXT: s_xor_b32 s13, s0, s12 919; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s13 920; GFX8-NEXT: s_ashr_i32 s15, s1, 31 921; GFX8-NEXT: s_add_i32 s0, s1, s15 922; GFX8-NEXT: s_sub_i32 s1, 0, s13 923; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 924; GFX8-NEXT: s_xor_b32 s16, s0, s15 925; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s16 926; GFX8-NEXT: s_ashr_i32 s14, s4, 31 927; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 928; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 929; GFX8-NEXT: s_add_i32 s0, s4, s14 930; GFX8-NEXT: s_xor_b32 s0, s0, s14 931; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 932; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 933; GFX8-NEXT: s_ashr_i32 s4, s5, 31 934; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 935; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 936; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 937; GFX8-NEXT: v_mul_f32_e32 v1, v2, v3 938; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 939; GFX8-NEXT: v_mul_lo_u32 v2, v0, s13 940; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v0 941; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s0, v2 942; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s13, v2 943; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 944; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s13, v2 945; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 946; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v0 947; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s13, v2 948; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 949; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s13, v2 950; GFX8-NEXT: s_sub_i32 s0, 0, s16 951; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 952; GFX8-NEXT: v_mul_lo_u32 v4, s0, v1 953; GFX8-NEXT: s_add_i32 s1, s5, s4 954; GFX8-NEXT: s_xor_b32 s1, s1, s4 955; GFX8-NEXT: s_xor_b32 s0, s14, s12 956; GFX8-NEXT: v_mul_hi_u32 v4, v1, v4 957; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 958; GFX8-NEXT: v_xor_b32_e32 v2, s14, v2 959; GFX8-NEXT: s_ashr_i32 s5, s2, 31 960; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v4 961; GFX8-NEXT: v_mul_hi_u32 v1, s1, v1 962; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 963; GFX8-NEXT: s_add_i32 s0, s2, s5 964; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s14, v2 965; GFX8-NEXT: v_mul_lo_u32 v5, v1, s16 966; GFX8-NEXT: s_xor_b32 s2, s0, s5 967; GFX8-NEXT: s_ashr_i32 s12, s6, 31 968; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s1, v5 969; GFX8-NEXT: v_add_u32_e32 v5, vcc, 1, v1 970; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s16, v2 971; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 972; GFX8-NEXT: v_cvt_f32_u32_e32 v5, s2 973; GFX8-NEXT: v_subrev_u32_e64 v6, s[0:1], s16, v2 974; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 975; GFX8-NEXT: v_add_u32_e32 v6, vcc, 1, v1 976; GFX8-NEXT: v_rcp_iflag_f32_e32 v5, v5 977; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s16, v2 978; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 979; GFX8-NEXT: v_subrev_u32_e64 v6, s[0:1], s16, v2 980; GFX8-NEXT: v_mul_f32_e32 v5, v5, v3 981; GFX8-NEXT: v_cvt_u32_f32_e32 v5, v5 982; GFX8-NEXT: s_sub_i32 s0, 0, s2 983; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 984; GFX8-NEXT: s_add_i32 s1, s6, s12 985; GFX8-NEXT: v_mul_lo_u32 v6, s0, v5 986; GFX8-NEXT: s_xor_b32 s1, s1, s12 987; GFX8-NEXT: s_xor_b32 s0, s4, s15 988; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2 989; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 990; GFX8-NEXT: v_xor_b32_e32 v1, s0, v1 991; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, s0, v1 992; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v6 993; GFX8-NEXT: v_mul_hi_u32 v6, s1, v5 994; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s4, v2 995; GFX8-NEXT: s_ashr_i32 s4, s3, 31 996; GFX8-NEXT: s_add_i32 s0, s3, s4 997; GFX8-NEXT: v_mul_lo_u32 v7, v6, s2 998; GFX8-NEXT: s_xor_b32 s3, s0, s4 999; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s1, v7 1000; GFX8-NEXT: v_add_u32_e32 v7, vcc, 1, v6 1001; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s2, v2 1002; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 1003; GFX8-NEXT: v_cvt_f32_u32_e32 v7, s3 1004; GFX8-NEXT: v_subrev_u32_e64 v8, s[0:1], s2, v2 1005; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 1006; GFX8-NEXT: v_add_u32_e32 v8, vcc, 1, v6 1007; GFX8-NEXT: v_rcp_iflag_f32_e32 v7, v7 1008; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s2, v2 1009; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 1010; GFX8-NEXT: v_mul_f32_e32 v3, v7, v3 1011; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 1012; GFX8-NEXT: v_subrev_u32_e64 v7, s[0:1], s2, v2 1013; GFX8-NEXT: s_sub_i32 s0, 0, s3 1014; GFX8-NEXT: v_cndmask_b32_e32 v7, v2, v7, vcc 1015; GFX8-NEXT: v_mul_lo_u32 v2, s0, v3 1016; GFX8-NEXT: s_ashr_i32 s2, s7, 31 1017; GFX8-NEXT: s_add_i32 s1, s7, s2 1018; GFX8-NEXT: s_xor_b32 s1, s1, s2 1019; GFX8-NEXT: v_mul_hi_u32 v2, v3, v2 1020; GFX8-NEXT: s_xor_b32 s0, s12, s5 1021; GFX8-NEXT: v_xor_b32_e32 v6, s0, v6 1022; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 1023; GFX8-NEXT: v_mul_hi_u32 v3, s1, v2 1024; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s0, v6 1025; GFX8-NEXT: v_xor_b32_e32 v6, s12, v7 1026; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s12, v6 1027; GFX8-NEXT: v_mul_lo_u32 v7, v3, s3 1028; GFX8-NEXT: v_add_u32_e32 v8, vcc, 1, v3 1029; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s1, v7 1030; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v7 1031; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 1032; GFX8-NEXT: v_subrev_u32_e64 v8, s[0:1], s3, v7 1033; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 1034; GFX8-NEXT: v_add_u32_e32 v8, vcc, 1, v3 1035; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v7 1036; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 1037; GFX8-NEXT: v_subrev_u32_e64 v8, s[0:1], s3, v7 1038; GFX8-NEXT: s_xor_b32 s0, s2, s4 1039; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 1040; GFX8-NEXT: v_xor_b32_e32 v3, s0, v3 1041; GFX8-NEXT: v_mov_b32_e32 v8, s8 1042; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s0, v3 1043; GFX8-NEXT: v_mov_b32_e32 v9, s9 1044; GFX8-NEXT: v_xor_b32_e32 v7, s2, v7 1045; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 1046; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s2, v7 1047; GFX8-NEXT: v_mov_b32_e32 v0, s10 1048; GFX8-NEXT: v_mov_b32_e32 v1, s11 1049; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 1050; GFX8-NEXT: s_endpgm 1051; 1052; GFX9-LABEL: sdivrem_v4i32: 1053; GFX9: ; %bb.0: 1054; GFX9-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x20 1055; GFX9-NEXT: v_mov_b32_e32 v2, 0x4f7ffffe 1056; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1057; GFX9-NEXT: s_ashr_i32 s6, s12, 31 1058; GFX9-NEXT: s_add_i32 s0, s12, s6 1059; GFX9-NEXT: s_xor_b32 s7, s0, s6 1060; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 1061; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1062; GFX9-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x10 1063; GFX9-NEXT: s_ashr_i32 s4, s13, 31 1064; GFX9-NEXT: s_add_i32 s5, s13, s4 1065; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 1066; GFX9-NEXT: s_sub_i32 s12, 0, s7 1067; GFX9-NEXT: s_xor_b32 s5, s5, s4 1068; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s5 1069; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1070; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 1071; GFX9-NEXT: s_sub_i32 s13, 0, s5 1072; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 1073; GFX9-NEXT: v_mul_lo_u32 v3, s12, v0 1074; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1075; GFX9-NEXT: s_ashr_i32 s12, s8, 31 1076; GFX9-NEXT: s_add_i32 s8, s8, s12 1077; GFX9-NEXT: s_xor_b32 s8, s8, s12 1078; GFX9-NEXT: v_mul_hi_u32 v3, v0, v3 1079; GFX9-NEXT: v_mul_f32_e32 v1, v1, v2 1080; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 1081; GFX9-NEXT: s_xor_b32 s6, s12, s6 1082; GFX9-NEXT: v_add_u32_e32 v0, v0, v3 1083; GFX9-NEXT: v_mul_hi_u32 v0, s8, v0 1084; GFX9-NEXT: v_mul_lo_u32 v3, s13, v1 1085; GFX9-NEXT: s_ashr_i32 s13, s9, 31 1086; GFX9-NEXT: s_add_i32 s9, s9, s13 1087; GFX9-NEXT: v_mul_lo_u32 v4, v0, s7 1088; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 1089; GFX9-NEXT: v_add_u32_e32 v5, 1, v0 1090; GFX9-NEXT: s_xor_b32 s4, s13, s4 1091; GFX9-NEXT: v_sub_u32_e32 v4, s8, v4 1092; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v4 1093; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 1094; GFX9-NEXT: v_subrev_u32_e32 v5, s7, v4 1095; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1096; GFX9-NEXT: v_add_u32_e32 v5, 1, v0 1097; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v4 1098; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 1099; GFX9-NEXT: v_subrev_u32_e32 v5, s7, v4 1100; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 1101; GFX9-NEXT: s_xor_b32 s7, s9, s13 1102; GFX9-NEXT: v_mul_hi_u32 v1, s7, v1 1103; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc 1104; GFX9-NEXT: v_xor_b32_e32 v0, s6, v0 1105; GFX9-NEXT: v_subrev_u32_e32 v0, s6, v0 1106; GFX9-NEXT: v_mul_lo_u32 v5, v1, s5 1107; GFX9-NEXT: v_xor_b32_e32 v3, s12, v3 1108; GFX9-NEXT: s_ashr_i32 s6, s14, 31 1109; GFX9-NEXT: v_subrev_u32_e32 v4, s12, v3 1110; GFX9-NEXT: v_sub_u32_e32 v3, s7, v5 1111; GFX9-NEXT: s_add_i32 s7, s14, s6 1112; GFX9-NEXT: s_xor_b32 s7, s7, s6 1113; GFX9-NEXT: v_cvt_f32_u32_e32 v5, s7 1114; GFX9-NEXT: v_add_u32_e32 v6, 1, v1 1115; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 1116; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 1117; GFX9-NEXT: v_rcp_iflag_f32_e32 v5, v5 1118; GFX9-NEXT: v_subrev_u32_e32 v6, s5, v3 1119; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc 1120; GFX9-NEXT: v_add_u32_e32 v6, 1, v1 1121; GFX9-NEXT: v_mul_f32_e32 v5, v5, v2 1122; GFX9-NEXT: v_cvt_u32_f32_e32 v5, v5 1123; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 1124; GFX9-NEXT: s_sub_i32 s8, 0, s7 1125; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 1126; GFX9-NEXT: v_mul_lo_u32 v6, s8, v5 1127; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 1128; GFX9-NEXT: v_subrev_u32_e32 v1, s4, v1 1129; GFX9-NEXT: s_ashr_i32 s4, s15, 31 1130; GFX9-NEXT: s_add_i32 s9, s15, s4 1131; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 1132; GFX9-NEXT: s_xor_b32 s9, s9, s4 1133; GFX9-NEXT: v_cvt_f32_u32_e32 v8, s9 1134; GFX9-NEXT: v_subrev_u32_e32 v7, s5, v3 1135; GFX9-NEXT: s_ashr_i32 s5, s10, 31 1136; GFX9-NEXT: s_add_i32 s8, s10, s5 1137; GFX9-NEXT: s_xor_b32 s8, s8, s5 1138; GFX9-NEXT: v_add_u32_e32 v5, v5, v6 1139; GFX9-NEXT: v_rcp_iflag_f32_e32 v8, v8 1140; GFX9-NEXT: v_mul_hi_u32 v6, s8, v5 1141; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 1142; GFX9-NEXT: v_xor_b32_e32 v3, s13, v3 1143; GFX9-NEXT: v_mul_f32_e32 v2, v8, v2 1144; GFX9-NEXT: v_mul_lo_u32 v7, v6, s7 1145; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 1146; GFX9-NEXT: v_subrev_u32_e32 v5, s13, v3 1147; GFX9-NEXT: s_xor_b32 s6, s5, s6 1148; GFX9-NEXT: v_sub_u32_e32 v3, s8, v7 1149; GFX9-NEXT: s_sub_i32 s8, 0, s9 1150; GFX9-NEXT: v_mul_lo_u32 v8, s8, v2 1151; GFX9-NEXT: v_add_u32_e32 v7, 1, v6 1152; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 1153; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 1154; GFX9-NEXT: v_subrev_u32_e32 v7, s7, v3 1155; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 1156; GFX9-NEXT: v_mul_hi_u32 v8, v2, v8 1157; GFX9-NEXT: v_add_u32_e32 v7, 1, v6 1158; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 1159; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 1160; GFX9-NEXT: v_subrev_u32_e32 v7, s7, v3 1161; GFX9-NEXT: s_ashr_i32 s7, s11, 31 1162; GFX9-NEXT: s_add_i32 s8, s11, s7 1163; GFX9-NEXT: s_xor_b32 s8, s8, s7 1164; GFX9-NEXT: v_add_u32_e32 v2, v2, v8 1165; GFX9-NEXT: v_mul_hi_u32 v8, s8, v2 1166; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 1167; GFX9-NEXT: v_xor_b32_e32 v3, s5, v3 1168; GFX9-NEXT: v_xor_b32_e32 v2, s6, v6 1169; GFX9-NEXT: v_mul_lo_u32 v7, v8, s9 1170; GFX9-NEXT: v_subrev_u32_e32 v6, s5, v3 1171; GFX9-NEXT: s_xor_b32 s4, s7, s4 1172; GFX9-NEXT: v_subrev_u32_e32 v2, s6, v2 1173; GFX9-NEXT: v_sub_u32_e32 v3, s8, v7 1174; GFX9-NEXT: v_add_u32_e32 v7, 1, v8 1175; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v3 1176; GFX9-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc 1177; GFX9-NEXT: v_subrev_u32_e32 v8, s9, v3 1178; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 1179; GFX9-NEXT: v_add_u32_e32 v8, 1, v7 1180; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s9, v3 1181; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 1182; GFX9-NEXT: v_subrev_u32_e32 v8, s9, v3 1183; GFX9-NEXT: v_cndmask_b32_e32 v8, v3, v8, vcc 1184; GFX9-NEXT: v_xor_b32_e32 v3, s4, v7 1185; GFX9-NEXT: v_xor_b32_e32 v7, s7, v8 1186; GFX9-NEXT: v_subrev_u32_e32 v3, s4, v3 1187; GFX9-NEXT: v_mov_b32_e32 v8, 0 1188; GFX9-NEXT: v_subrev_u32_e32 v7, s7, v7 1189; GFX9-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] 1190; GFX9-NEXT: global_store_dwordx4 v8, v[4:7], s[2:3] 1191; GFX9-NEXT: s_endpgm 1192; 1193; GFX10-LABEL: sdivrem_v4i32: 1194; GFX10: ; %bb.0: 1195; GFX10-NEXT: s_clause 0x1 1196; GFX10-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x20 1197; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 1198; GFX10-NEXT: v_mov_b32_e32 v4, 0x4f7ffffe 1199; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1200; GFX10-NEXT: s_ashr_i32 s12, s8, 31 1201; GFX10-NEXT: s_ashr_i32 s14, s10, 31 1202; GFX10-NEXT: s_add_i32 s6, s8, s12 1203; GFX10-NEXT: s_add_i32 s8, s10, s14 1204; GFX10-NEXT: s_xor_b32 s10, s6, s12 1205; GFX10-NEXT: s_ashr_i32 s13, s9, 31 1206; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s10 1207; GFX10-NEXT: s_ashr_i32 s15, s11, 31 1208; GFX10-NEXT: s_add_i32 s7, s9, s13 1209; GFX10-NEXT: s_add_i32 s9, s11, s15 1210; GFX10-NEXT: s_xor_b32 s11, s7, s13 1211; GFX10-NEXT: s_xor_b32 s8, s8, s14 1212; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 1213; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s11 1214; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s8 1215; GFX10-NEXT: s_xor_b32 s9, s9, s15 1216; GFX10-NEXT: s_sub_i32 s6, 0, s10 1217; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s9 1218; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 1219; GFX10-NEXT: v_rcp_iflag_f32_e32 v2, v2 1220; GFX10-NEXT: s_sub_i32 s7, 0, s11 1221; GFX10-NEXT: s_sub_i32 s19, 0, s8 1222; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 1223; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1224; GFX10-NEXT: s_ashr_i32 s16, s0, 31 1225; GFX10-NEXT: s_ashr_i32 s17, s1, 31 1226; GFX10-NEXT: s_add_i32 s0, s0, s16 1227; GFX10-NEXT: s_ashr_i32 s18, s2, 31 1228; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 1229; GFX10-NEXT: v_mul_f32_e32 v1, v1, v4 1230; GFX10-NEXT: v_mul_f32_e32 v2, v2, v4 1231; GFX10-NEXT: s_xor_b32 s0, s0, s16 1232; GFX10-NEXT: v_mul_f32_e32 v3, v3, v4 1233; GFX10-NEXT: v_mul_lo_u32 v4, s6, v0 1234; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 1235; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 1236; GFX10-NEXT: s_sub_i32 s6, 0, s9 1237; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 1238; GFX10-NEXT: s_add_i32 s1, s1, s17 1239; GFX10-NEXT: v_mul_lo_u32 v5, s7, v1 1240; GFX10-NEXT: v_mul_lo_u32 v6, s19, v2 1241; GFX10-NEXT: v_mul_hi_u32 v4, v0, v4 1242; GFX10-NEXT: v_mul_lo_u32 v7, s6, v3 1243; GFX10-NEXT: s_add_i32 s2, s2, s18 1244; GFX10-NEXT: s_ashr_i32 s19, s3, 31 1245; GFX10-NEXT: s_xor_b32 s1, s1, s17 1246; GFX10-NEXT: s_xor_b32 s2, s2, s18 1247; GFX10-NEXT: v_mul_hi_u32 v5, v1, v5 1248; GFX10-NEXT: v_mul_hi_u32 v6, v2, v6 1249; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v4 1250; GFX10-NEXT: v_mul_hi_u32 v7, v3, v7 1251; GFX10-NEXT: s_add_i32 s3, s3, s19 1252; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 1253; GFX10-NEXT: s_xor_b32 s3, s3, s19 1254; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 1255; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v5 1256; GFX10-NEXT: v_add_nc_u32_e32 v2, v2, v6 1257; GFX10-NEXT: s_xor_b32 s12, s16, s12 1258; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v7 1259; GFX10-NEXT: s_xor_b32 s13, s17, s13 1260; GFX10-NEXT: v_mul_hi_u32 v1, s1, v1 1261; GFX10-NEXT: v_mul_hi_u32 v2, s2, v2 1262; GFX10-NEXT: v_mul_lo_u32 v4, v0, s10 1263; GFX10-NEXT: v_mul_hi_u32 v3, s3, v3 1264; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v0 1265; GFX10-NEXT: s_xor_b32 s14, s18, s14 1266; GFX10-NEXT: v_mul_lo_u32 v5, v1, s11 1267; GFX10-NEXT: v_mul_lo_u32 v6, v2, s8 1268; GFX10-NEXT: v_sub_nc_u32_e32 v4, s0, v4 1269; GFX10-NEXT: v_mul_lo_u32 v7, v3, s9 1270; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v1 1271; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v2 1272; GFX10-NEXT: v_add_nc_u32_e32 v11, 1, v3 1273; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s10, v4 1274; GFX10-NEXT: v_sub_nc_u32_e32 v5, s1, v5 1275; GFX10-NEXT: v_sub_nc_u32_e32 v6, s2, v6 1276; GFX10-NEXT: v_sub_nc_u32_e32 v7, s3, v7 1277; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1278; GFX10-NEXT: v_subrev_nc_u32_e32 v8, s10, v4 1279; GFX10-NEXT: v_cmp_le_u32_e64 s0, s11, v5 1280; GFX10-NEXT: v_cmp_le_u32_e64 s1, s8, v6 1281; GFX10-NEXT: v_cmp_le_u32_e64 s2, s9, v7 1282; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 1283; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s0 1284; GFX10-NEXT: v_subrev_nc_u32_e32 v9, s11, v5 1285; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s1 1286; GFX10-NEXT: v_subrev_nc_u32_e32 v10, s8, v6 1287; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s2 1288; GFX10-NEXT: v_subrev_nc_u32_e32 v11, s9, v7 1289; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v0 1290; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s10, v4 1291; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v9, s0 1292; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v10, s1 1293; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v11, s2 1294; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v1 1295; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1296; GFX10-NEXT: v_subrev_nc_u32_e32 v8, s10, v4 1297; GFX10-NEXT: v_cmp_le_u32_e64 s0, s11, v5 1298; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v2 1299; GFX10-NEXT: v_cmp_le_u32_e64 s1, s8, v6 1300; GFX10-NEXT: v_add_nc_u32_e32 v11, 1, v3 1301; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 1302; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s9, v7 1303; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s0 1304; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s1 1305; GFX10-NEXT: v_subrev_nc_u32_e32 v9, s11, v5 1306; GFX10-NEXT: v_subrev_nc_u32_e32 v10, s8, v6 1307; GFX10-NEXT: v_subrev_nc_u32_e32 v12, s9, v7 1308; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc_lo 1309; GFX10-NEXT: v_xor_b32_e32 v0, s12, v0 1310; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v9, s0 1311; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v10, s1 1312; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v12, vcc_lo 1313; GFX10-NEXT: s_xor_b32 s0, s19, s15 1314; GFX10-NEXT: v_xor_b32_e32 v1, s13, v1 1315; GFX10-NEXT: v_xor_b32_e32 v2, s14, v2 1316; GFX10-NEXT: v_xor_b32_e32 v3, s0, v3 1317; GFX10-NEXT: v_xor_b32_e32 v4, s16, v4 1318; GFX10-NEXT: v_xor_b32_e32 v5, s17, v5 1319; GFX10-NEXT: v_xor_b32_e32 v6, s18, v6 1320; GFX10-NEXT: v_xor_b32_e32 v7, s19, v7 1321; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s12, v0 1322; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s13, v1 1323; GFX10-NEXT: v_subrev_nc_u32_e32 v2, s14, v2 1324; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s0, v3 1325; GFX10-NEXT: v_mov_b32_e32 v8, 0 1326; GFX10-NEXT: v_subrev_nc_u32_e32 v4, s16, v4 1327; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s17, v5 1328; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s18, v6 1329; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s19, v7 1330; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1331; GFX10-NEXT: global_store_dwordx4 v8, v[0:3], s[4:5] 1332; GFX10-NEXT: global_store_dwordx4 v8, v[4:7], s[6:7] 1333; GFX10-NEXT: s_endpgm 1334 %div = sdiv <4 x i32> %x, %y 1335 store <4 x i32> %div, <4 x i32> addrspace(1)* %out0 1336 %rem = srem <4 x i32> %x, %y 1337 store <4 x i32> %rem, <4 x i32> addrspace(1)* %out1 1338 ret void 1339} 1340 1341define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64> addrspace(1)* %out1, <2 x i64> %x, <2 x i64> %y) { 1342; GFX8-LABEL: sdivrem_v2i64: 1343; GFX8: ; %bb.0: 1344; GFX8-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x10 1345; GFX8-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x20 1346; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1347; GFX8-NEXT: s_ashr_i32 s2, s13, 31 1348; GFX8-NEXT: s_ashr_i32 s6, s9, 31 1349; GFX8-NEXT: s_add_u32 s0, s12, s2 1350; GFX8-NEXT: s_cselect_b32 s1, 1, 0 1351; GFX8-NEXT: s_and_b32 s1, s1, 1 1352; GFX8-NEXT: s_cmp_lg_u32 s1, 0 1353; GFX8-NEXT: s_addc_u32 s1, s13, s2 1354; GFX8-NEXT: s_add_u32 s8, s8, s6 1355; GFX8-NEXT: s_cselect_b32 s3, 1, 0 1356; GFX8-NEXT: s_and_b32 s3, s3, 1 1357; GFX8-NEXT: s_cmp_lg_u32 s3, 0 1358; GFX8-NEXT: s_mov_b32 s7, s6 1359; GFX8-NEXT: s_addc_u32 s9, s9, s6 1360; GFX8-NEXT: s_xor_b64 s[8:9], s[8:9], s[6:7] 1361; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s9 1362; GFX8-NEXT: v_cvt_f32_u32_e32 v1, s8 1363; GFX8-NEXT: s_mov_b32 s3, s2 1364; GFX8-NEXT: s_xor_b64 s[12:13], s[0:1], s[2:3] 1365; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 1366; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 1367; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 1368; GFX8-NEXT: s_sub_u32 s16, 0, s8 1369; GFX8-NEXT: s_cselect_b32 s0, 1, 0 1370; GFX8-NEXT: s_and_b32 s0, s0, 1 1371; GFX8-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 1372; GFX8-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 1373; GFX8-NEXT: v_trunc_f32_e32 v1, v1 1374; GFX8-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 1375; GFX8-NEXT: v_add_f32_e32 v0, v2, v0 1376; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 1377; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 1378; GFX8-NEXT: s_cmp_lg_u32 s0, 0 1379; GFX8-NEXT: s_subb_u32 s17, 0, s9 1380; GFX8-NEXT: v_mul_lo_u32 v3, s17, v0 1381; GFX8-NEXT: v_mul_lo_u32 v2, s16, v1 1382; GFX8-NEXT: v_mul_hi_u32 v5, s16, v0 1383; GFX8-NEXT: v_mul_lo_u32 v4, s16, v0 1384; GFX8-NEXT: v_mov_b32_e32 v6, s9 1385; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 1386; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 1387; GFX8-NEXT: v_mul_lo_u32 v3, v1, v4 1388; GFX8-NEXT: v_mul_lo_u32 v5, v0, v2 1389; GFX8-NEXT: v_mul_hi_u32 v7, v0, v4 1390; GFX8-NEXT: v_mul_hi_u32 v4, v1, v4 1391; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 1392; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1393; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v7 1394; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1395; GFX8-NEXT: v_mul_lo_u32 v7, v1, v2 1396; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 1397; GFX8-NEXT: v_mul_hi_u32 v5, v0, v2 1398; GFX8-NEXT: v_mul_hi_u32 v2, v1, v2 1399; GFX8-NEXT: v_add_u32_e32 v4, vcc, v7, v4 1400; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1401; GFX8-NEXT: v_add_u32_e32 v4, vcc, v4, v5 1402; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1403; GFX8-NEXT: v_add_u32_e32 v5, vcc, v7, v5 1404; GFX8-NEXT: v_add_u32_e32 v3, vcc, v4, v3 1405; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1406; GFX8-NEXT: v_add_u32_e32 v4, vcc, v5, v4 1407; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 1408; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 1409; GFX8-NEXT: v_addc_u32_e64 v3, s[0:1], v1, v2, vcc 1410; GFX8-NEXT: v_mul_lo_u32 v4, s17, v0 1411; GFX8-NEXT: v_mul_lo_u32 v5, s16, v3 1412; GFX8-NEXT: v_mul_hi_u32 v8, s16, v0 1413; GFX8-NEXT: v_mul_lo_u32 v7, s16, v0 1414; GFX8-NEXT: v_add_u32_e64 v1, s[0:1], v1, v2 1415; GFX8-NEXT: v_add_u32_e64 v4, s[0:1], v4, v5 1416; GFX8-NEXT: v_add_u32_e64 v4, s[0:1], v4, v8 1417; GFX8-NEXT: v_mul_lo_u32 v5, v3, v7 1418; GFX8-NEXT: v_mul_lo_u32 v8, v0, v4 1419; GFX8-NEXT: v_mul_hi_u32 v2, v0, v7 1420; GFX8-NEXT: v_mul_hi_u32 v7, v3, v7 1421; GFX8-NEXT: v_add_u32_e64 v5, s[0:1], v5, v8 1422; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1] 1423; GFX8-NEXT: v_add_u32_e64 v2, s[0:1], v5, v2 1424; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 1425; GFX8-NEXT: v_mul_lo_u32 v5, v3, v4 1426; GFX8-NEXT: v_add_u32_e64 v2, s[0:1], v8, v2 1427; GFX8-NEXT: v_mul_hi_u32 v8, v0, v4 1428; GFX8-NEXT: v_mul_hi_u32 v3, v3, v4 1429; GFX8-NEXT: v_add_u32_e64 v5, s[0:1], v5, v7 1430; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[0:1] 1431; GFX8-NEXT: v_add_u32_e64 v5, s[0:1], v5, v8 1432; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1] 1433; GFX8-NEXT: v_add_u32_e64 v7, s[0:1], v7, v8 1434; GFX8-NEXT: v_add_u32_e64 v2, s[0:1], v5, v2 1435; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[0:1] 1436; GFX8-NEXT: v_add_u32_e64 v4, s[0:1], v7, v5 1437; GFX8-NEXT: v_add_u32_e64 v3, s[0:1], v3, v4 1438; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc 1439; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1440; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1441; GFX8-NEXT: v_mul_lo_u32 v2, s13, v0 1442; GFX8-NEXT: v_mul_lo_u32 v3, s12, v1 1443; GFX8-NEXT: v_mul_hi_u32 v5, s12, v0 1444; GFX8-NEXT: v_mul_hi_u32 v0, s13, v0 1445; GFX8-NEXT: v_mov_b32_e32 v4, s13 1446; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 1447; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1448; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v5 1449; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1450; GFX8-NEXT: v_mul_lo_u32 v5, s13, v1 1451; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 1452; GFX8-NEXT: v_mul_hi_u32 v3, s12, v1 1453; GFX8-NEXT: v_mul_hi_u32 v1, s13, v1 1454; GFX8-NEXT: v_add_u32_e32 v0, vcc, v5, v0 1455; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1456; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v3 1457; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1458; GFX8-NEXT: v_add_u32_e32 v3, vcc, v5, v3 1459; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1460; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1461; GFX8-NEXT: v_add_u32_e32 v2, vcc, v3, v2 1462; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v2 1463; GFX8-NEXT: v_mul_lo_u32 v2, s9, v0 1464; GFX8-NEXT: v_mul_lo_u32 v3, s8, v1 1465; GFX8-NEXT: v_mul_hi_u32 v7, s8, v0 1466; GFX8-NEXT: v_mul_lo_u32 v5, s8, v0 1467; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 1468; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 1469; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s12, v5 1470; GFX8-NEXT: v_subb_u32_e64 v4, s[0:1], v4, v2, vcc 1471; GFX8-NEXT: v_sub_u32_e64 v2, s[0:1], s13, v2 1472; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v4 1473; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[0:1] 1474; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v3 1475; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] 1476; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v4 1477; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc 1478; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[0:1] 1479; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s8, v3 1480; GFX8-NEXT: v_subbrev_u32_e64 v8, s[0:1], 0, v2, vcc 1481; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], 1, v0 1482; GFX8-NEXT: v_addc_u32_e64 v10, s[0:1], 0, v1, s[0:1] 1483; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v8 1484; GFX8-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc 1485; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] 1486; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7 1487; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s8, v7 1488; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] 1489; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v8 1490; GFX8-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] 1491; GFX8-NEXT: v_add_u32_e64 v12, s[0:1], 1, v9 1492; GFX8-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc 1493; GFX8-NEXT: v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1] 1494; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 1495; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v5 1496; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v6, vcc 1497; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 1498; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc 1499; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc 1500; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1] 1501; GFX8-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[0:1] 1502; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[0:1] 1503; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v10, s[0:1] 1504; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[6:7] 1505; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 1506; GFX8-NEXT: s_ashr_i32 s6, s15, 31 1507; GFX8-NEXT: s_ashr_i32 s8, s11, 31 1508; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 1509; GFX8-NEXT: s_add_u32 s0, s14, s6 1510; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1 1511; GFX8-NEXT: v_mov_b32_e32 v4, s1 1512; GFX8-NEXT: s_cselect_b32 s1, 1, 0 1513; GFX8-NEXT: s_and_b32 s1, s1, 1 1514; GFX8-NEXT: s_cmp_lg_u32 s1, 0 1515; GFX8-NEXT: s_addc_u32 s1, s15, s6 1516; GFX8-NEXT: s_add_u32 s10, s10, s8 1517; GFX8-NEXT: s_cselect_b32 s3, 1, 0 1518; GFX8-NEXT: s_and_b32 s3, s3, 1 1519; GFX8-NEXT: s_cmp_lg_u32 s3, 0 1520; GFX8-NEXT: s_mov_b32 s9, s8 1521; GFX8-NEXT: s_addc_u32 s11, s11, s8 1522; GFX8-NEXT: s_xor_b64 s[10:11], s[10:11], s[8:9] 1523; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc 1524; GFX8-NEXT: v_cvt_f32_u32_e32 v4, s11 1525; GFX8-NEXT: v_cvt_f32_u32_e32 v5, s10 1526; GFX8-NEXT: v_xor_b32_e32 v3, s2, v3 1527; GFX8-NEXT: v_xor_b32_e32 v2, s2, v2 1528; GFX8-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 1529; GFX8-NEXT: v_add_f32_e32 v4, v4, v5 1530; GFX8-NEXT: v_rcp_iflag_f32_e32 v7, v4 1531; GFX8-NEXT: v_mov_b32_e32 v6, s2 1532; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s2, v3 1533; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v2, v6, vcc 1534; GFX8-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v7 1535; GFX8-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 1536; GFX8-NEXT: s_mov_b32 s7, s6 1537; GFX8-NEXT: v_trunc_f32_e32 v3, v3 1538; GFX8-NEXT: v_mul_f32_e32 v6, 0xcf800000, v3 1539; GFX8-NEXT: s_xor_b64 s[2:3], s[0:1], s[6:7] 1540; GFX8-NEXT: v_add_f32_e32 v2, v6, v2 1541; GFX8-NEXT: s_sub_u32 s12, 0, s10 1542; GFX8-NEXT: s_cselect_b32 s0, 1, 0 1543; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2 1544; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 1545; GFX8-NEXT: s_and_b32 s0, s0, 1 1546; GFX8-NEXT: s_cmp_lg_u32 s0, 0 1547; GFX8-NEXT: s_subb_u32 s13, 0, s11 1548; GFX8-NEXT: v_mul_lo_u32 v6, s13, v2 1549; GFX8-NEXT: v_mul_lo_u32 v7, s12, v3 1550; GFX8-NEXT: v_mul_hi_u32 v9, s12, v2 1551; GFX8-NEXT: v_mul_lo_u32 v8, s12, v2 1552; GFX8-NEXT: v_mov_b32_e32 v10, s11 1553; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7 1554; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v9 1555; GFX8-NEXT: v_mul_lo_u32 v7, v3, v8 1556; GFX8-NEXT: v_mul_lo_u32 v9, v2, v6 1557; GFX8-NEXT: v_mul_hi_u32 v11, v2, v8 1558; GFX8-NEXT: v_mul_hi_u32 v8, v3, v8 1559; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v9 1560; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1561; GFX8-NEXT: v_add_u32_e32 v7, vcc, v7, v11 1562; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1563; GFX8-NEXT: v_mul_lo_u32 v11, v3, v6 1564; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 1565; GFX8-NEXT: v_mul_hi_u32 v9, v2, v6 1566; GFX8-NEXT: v_mul_hi_u32 v6, v3, v6 1567; GFX8-NEXT: v_add_u32_e32 v8, vcc, v11, v8 1568; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc 1569; GFX8-NEXT: v_add_u32_e32 v8, vcc, v8, v9 1570; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1571; GFX8-NEXT: v_add_u32_e32 v9, vcc, v11, v9 1572; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7 1573; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1574; GFX8-NEXT: v_add_u32_e32 v8, vcc, v9, v8 1575; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 1576; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 1577; GFX8-NEXT: v_addc_u32_e64 v7, s[0:1], v3, v6, vcc 1578; GFX8-NEXT: v_mul_lo_u32 v8, s13, v2 1579; GFX8-NEXT: v_mul_lo_u32 v9, s12, v7 1580; GFX8-NEXT: v_mul_hi_u32 v12, s12, v2 1581; GFX8-NEXT: v_mul_lo_u32 v11, s12, v2 1582; GFX8-NEXT: v_add_u32_e64 v3, s[0:1], v3, v6 1583; GFX8-NEXT: v_add_u32_e64 v8, s[0:1], v8, v9 1584; GFX8-NEXT: v_add_u32_e64 v8, s[0:1], v8, v12 1585; GFX8-NEXT: v_mul_lo_u32 v9, v7, v11 1586; GFX8-NEXT: v_mul_lo_u32 v12, v2, v8 1587; GFX8-NEXT: v_mul_hi_u32 v6, v2, v11 1588; GFX8-NEXT: v_mul_hi_u32 v11, v7, v11 1589; GFX8-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x0 1590; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], v9, v12 1591; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[0:1] 1592; GFX8-NEXT: v_add_u32_e64 v6, s[0:1], v9, v6 1593; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] 1594; GFX8-NEXT: v_mul_lo_u32 v9, v7, v8 1595; GFX8-NEXT: v_add_u32_e64 v6, s[0:1], v12, v6 1596; GFX8-NEXT: v_mul_hi_u32 v12, v2, v8 1597; GFX8-NEXT: v_mul_hi_u32 v7, v7, v8 1598; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], v9, v11 1599; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[0:1] 1600; GFX8-NEXT: v_add_u32_e64 v9, s[0:1], v9, v12 1601; GFX8-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[0:1] 1602; GFX8-NEXT: v_add_u32_e64 v11, s[0:1], v11, v12 1603; GFX8-NEXT: v_add_u32_e64 v6, s[0:1], v9, v6 1604; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[0:1] 1605; GFX8-NEXT: v_add_u32_e64 v8, s[0:1], v11, v9 1606; GFX8-NEXT: v_add_u32_e64 v7, s[0:1], v7, v8 1607; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc 1608; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 1609; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1610; GFX8-NEXT: v_mul_lo_u32 v6, s3, v2 1611; GFX8-NEXT: v_mul_lo_u32 v7, s2, v3 1612; GFX8-NEXT: v_mul_hi_u32 v9, s2, v2 1613; GFX8-NEXT: v_mul_hi_u32 v2, s3, v2 1614; GFX8-NEXT: v_mov_b32_e32 v8, s3 1615; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7 1616; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1617; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v9 1618; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1619; GFX8-NEXT: v_mul_lo_u32 v9, s3, v3 1620; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 1621; GFX8-NEXT: v_mul_hi_u32 v7, s2, v3 1622; GFX8-NEXT: v_mul_hi_u32 v3, s3, v3 1623; GFX8-NEXT: v_add_u32_e32 v2, vcc, v9, v2 1624; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1625; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v7 1626; GFX8-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 1627; GFX8-NEXT: v_add_u32_e32 v7, vcc, v9, v7 1628; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v6 1629; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1630; GFX8-NEXT: v_add_u32_e32 v6, vcc, v7, v6 1631; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v6 1632; GFX8-NEXT: v_mul_lo_u32 v6, s11, v2 1633; GFX8-NEXT: v_mul_lo_u32 v7, s10, v3 1634; GFX8-NEXT: v_mul_hi_u32 v11, s10, v2 1635; GFX8-NEXT: v_mul_lo_u32 v9, s10, v2 1636; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7 1637; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v11 1638; GFX8-NEXT: v_sub_u32_e32 v7, vcc, s2, v9 1639; GFX8-NEXT: v_subb_u32_e64 v8, s[0:1], v8, v6, vcc 1640; GFX8-NEXT: v_sub_u32_e64 v6, s[0:1], s3, v6 1641; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v8 1642; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[0:1] 1643; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v7 1644; GFX8-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] 1645; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v8 1646; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v6, v10, vcc 1647; GFX8-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[0:1] 1648; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, s10, v7 1649; GFX8-NEXT: v_subbrev_u32_e64 v12, s[0:1], 0, v6, vcc 1650; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v12 1651; GFX8-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] 1652; GFX8-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v11 1653; GFX8-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[0:1] 1654; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v12 1655; GFX8-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[0:1] 1656; GFX8-NEXT: v_add_u32_e64 v14, s[0:1], 1, v2 1657; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v6, v10, vcc 1658; GFX8-NEXT: v_addc_u32_e64 v15, s[0:1], 0, v3, s[0:1] 1659; GFX8-NEXT: v_add_u32_e32 v10, vcc, 1, v14 1660; GFX8-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc 1661; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 1662; GFX8-NEXT: v_subrev_u32_e64 v13, s[0:1], s10, v11 1663; GFX8-NEXT: v_subbrev_u32_e64 v6, s[0:1], 0, v6, s[0:1] 1664; GFX8-NEXT: v_cndmask_b32_e32 v10, v14, v10, vcc 1665; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v9 1666; GFX8-NEXT: v_cndmask_b32_e32 v6, v12, v6, vcc 1667; GFX8-NEXT: v_cndmask_b32_e32 v14, v15, v16, vcc 1668; GFX8-NEXT: v_cndmask_b32_e32 v9, v11, v13, vcc 1669; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[0:1] 1670; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1] 1671; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[0:1] 1672; GFX8-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[0:1] 1673; GFX8-NEXT: s_xor_b64 s[0:1], s[6:7], s[8:9] 1674; GFX8-NEXT: v_xor_b32_e32 v2, s0, v2 1675; GFX8-NEXT: v_xor_b32_e32 v3, s1, v3 1676; GFX8-NEXT: v_mov_b32_e32 v8, s1 1677; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s0, v2 1678; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc 1679; GFX8-NEXT: v_xor_b32_e32 v7, s6, v7 1680; GFX8-NEXT: v_xor_b32_e32 v8, s6, v6 1681; GFX8-NEXT: v_mov_b32_e32 v9, s6 1682; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s6, v7 1683; GFX8-NEXT: v_subb_u32_e32 v7, vcc, v8, v9, vcc 1684; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1685; GFX8-NEXT: v_mov_b32_e32 v8, s12 1686; GFX8-NEXT: v_mov_b32_e32 v9, s13 1687; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 1688; GFX8-NEXT: s_nop 0 1689; GFX8-NEXT: v_mov_b32_e32 v0, s14 1690; GFX8-NEXT: v_mov_b32_e32 v1, s15 1691; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 1692; GFX8-NEXT: s_endpgm 1693; 1694; GFX9-LABEL: sdivrem_v2i64: 1695; GFX9: ; %bb.0: 1696; GFX9-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x10 1697; GFX9-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x20 1698; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1699; GFX9-NEXT: s_ashr_i32 s2, s13, 31 1700; GFX9-NEXT: s_ashr_i32 s6, s9, 31 1701; GFX9-NEXT: s_add_u32 s0, s12, s2 1702; GFX9-NEXT: s_cselect_b32 s1, 1, 0 1703; GFX9-NEXT: s_and_b32 s1, s1, 1 1704; GFX9-NEXT: s_cmp_lg_u32 s1, 0 1705; GFX9-NEXT: s_addc_u32 s1, s13, s2 1706; GFX9-NEXT: s_add_u32 s8, s8, s6 1707; GFX9-NEXT: s_cselect_b32 s3, 1, 0 1708; GFX9-NEXT: s_and_b32 s3, s3, 1 1709; GFX9-NEXT: s_cmp_lg_u32 s3, 0 1710; GFX9-NEXT: s_mov_b32 s7, s6 1711; GFX9-NEXT: s_addc_u32 s9, s9, s6 1712; GFX9-NEXT: s_xor_b64 s[8:9], s[8:9], s[6:7] 1713; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s9 1714; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s8 1715; GFX9-NEXT: s_mov_b32 s3, s2 1716; GFX9-NEXT: s_xor_b64 s[12:13], s[0:1], s[2:3] 1717; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 1718; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 1719; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 1720; GFX9-NEXT: s_sub_u32 s16, 0, s8 1721; GFX9-NEXT: s_cselect_b32 s0, 1, 0 1722; GFX9-NEXT: s_and_b32 s0, s0, 1 1723; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 1724; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 1725; GFX9-NEXT: v_trunc_f32_e32 v1, v1 1726; GFX9-NEXT: v_mul_f32_e32 v2, 0xcf800000, v1 1727; GFX9-NEXT: v_add_f32_e32 v0, v2, v0 1728; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 1729; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 1730; GFX9-NEXT: s_cmp_lg_u32 s0, 0 1731; GFX9-NEXT: s_subb_u32 s17, 0, s9 1732; GFX9-NEXT: v_mul_lo_u32 v3, s17, v0 1733; GFX9-NEXT: v_mul_lo_u32 v2, s16, v1 1734; GFX9-NEXT: v_mul_hi_u32 v4, s16, v0 1735; GFX9-NEXT: v_mul_lo_u32 v5, s16, v0 1736; GFX9-NEXT: v_add3_u32 v2, v3, v2, v4 1737; GFX9-NEXT: v_mul_lo_u32 v3, v1, v5 1738; GFX9-NEXT: v_mul_lo_u32 v4, v0, v2 1739; GFX9-NEXT: v_mul_hi_u32 v6, v0, v5 1740; GFX9-NEXT: v_mul_hi_u32 v5, v1, v5 1741; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v4 1742; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1743; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 1744; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1745; GFX9-NEXT: v_mul_lo_u32 v6, v1, v2 1746; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 1747; GFX9-NEXT: v_mul_hi_u32 v4, v0, v2 1748; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 1749; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v6, v5 1750; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1751; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v5, v4 1752; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1753; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v4, v3 1754; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1755; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 1756; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 1757; GFX9-NEXT: v_add3_u32 v2, v5, v4, v2 1758; GFX9-NEXT: v_addc_co_u32_e64 v3, s[0:1], v1, v2, vcc 1759; GFX9-NEXT: v_mul_lo_u32 v4, s17, v0 1760; GFX9-NEXT: v_mul_lo_u32 v5, s16, v3 1761; GFX9-NEXT: v_mul_hi_u32 v6, s16, v0 1762; GFX9-NEXT: v_mul_lo_u32 v7, s16, v0 1763; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 1764; GFX9-NEXT: v_add3_u32 v4, v4, v5, v6 1765; GFX9-NEXT: v_mul_lo_u32 v5, v3, v7 1766; GFX9-NEXT: v_mul_lo_u32 v6, v0, v4 1767; GFX9-NEXT: v_mul_hi_u32 v2, v0, v7 1768; GFX9-NEXT: v_mul_hi_u32 v7, v3, v7 1769; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v5, v6 1770; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] 1771; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v5, v2 1772; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 1773; GFX9-NEXT: v_mul_lo_u32 v5, v3, v4 1774; GFX9-NEXT: v_add_u32_e32 v2, v6, v2 1775; GFX9-NEXT: v_mul_hi_u32 v6, v0, v4 1776; GFX9-NEXT: v_mul_hi_u32 v3, v3, v4 1777; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v5, v7 1778; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[0:1] 1779; GFX9-NEXT: v_add_co_u32_e64 v5, s[0:1], v5, v6 1780; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] 1781; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], v5, v2 1782; GFX9-NEXT: v_add_u32_e32 v6, v7, v6 1783; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[0:1] 1784; GFX9-NEXT: v_add3_u32 v3, v6, v4, v3 1785; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc 1786; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 1787; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 1788; GFX9-NEXT: v_mul_lo_u32 v2, s13, v0 1789; GFX9-NEXT: v_mul_lo_u32 v3, s12, v1 1790; GFX9-NEXT: v_mul_hi_u32 v4, s12, v0 1791; GFX9-NEXT: v_mul_hi_u32 v0, s13, v0 1792; GFX9-NEXT: v_mov_b32_e32 v7, s13 1793; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 1794; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1795; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 1796; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1797; GFX9-NEXT: v_mul_lo_u32 v4, s13, v1 1798; GFX9-NEXT: v_add_u32_e32 v2, v3, v2 1799; GFX9-NEXT: v_mul_hi_u32 v3, s12, v1 1800; GFX9-NEXT: v_mul_hi_u32 v1, s13, v1 1801; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v4, v0 1802; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 1803; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3 1804; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1805; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 1806; GFX9-NEXT: v_add_u32_e32 v3, v4, v3 1807; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1808; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1 1809; GFX9-NEXT: v_mul_lo_u32 v2, s9, v0 1810; GFX9-NEXT: v_mul_lo_u32 v3, s8, v1 1811; GFX9-NEXT: v_mul_hi_u32 v4, s8, v0 1812; GFX9-NEXT: v_mul_lo_u32 v6, s8, v0 1813; GFX9-NEXT: v_mov_b32_e32 v5, s9 1814; GFX9-NEXT: v_add3_u32 v2, v2, v3, v4 1815; GFX9-NEXT: v_sub_co_u32_e32 v3, vcc, s12, v6 1816; GFX9-NEXT: v_subb_co_u32_e64 v4, s[0:1], v7, v2, vcc 1817; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v4 1818; GFX9-NEXT: v_sub_u32_e32 v2, s13, v2 1819; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[0:1] 1820; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v3 1821; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] 1822; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v4 1823; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc 1824; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[0:1] 1825; GFX9-NEXT: v_subrev_co_u32_e32 v7, vcc, s8, v3 1826; GFX9-NEXT: v_subbrev_co_u32_e64 v8, s[0:1], 0, v2, vcc 1827; GFX9-NEXT: v_add_co_u32_e64 v9, s[0:1], 1, v0 1828; GFX9-NEXT: v_addc_co_u32_e64 v10, s[0:1], 0, v1, s[0:1] 1829; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v8 1830; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc 1831; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] 1832; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7 1833; GFX9-NEXT: v_subrev_co_u32_e32 v5, vcc, s8, v7 1834; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1] 1835; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v8 1836; GFX9-NEXT: v_cndmask_b32_e64 v11, v11, v12, s[0:1] 1837; GFX9-NEXT: v_add_co_u32_e64 v12, s[0:1], 1, v9 1838; GFX9-NEXT: v_subbrev_co_u32_e32 v2, vcc, 0, v2, vcc 1839; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 1840; GFX9-NEXT: v_addc_co_u32_e64 v13, s[0:1], 0, v10, s[0:1] 1841; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc 1842; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6 1843; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 1844; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc 1845; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc 1846; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1] 1847; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[0:1] 1848; GFX9-NEXT: s_ashr_i32 s8, s11, 31 1849; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v9, s[0:1] 1850; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v10, s[0:1] 1851; GFX9-NEXT: s_xor_b64 s[0:1], s[2:3], s[6:7] 1852; GFX9-NEXT: s_ashr_i32 s6, s15, 31 1853; GFX9-NEXT: s_add_u32 s12, s14, s6 1854; GFX9-NEXT: s_cselect_b32 s3, 1, 0 1855; GFX9-NEXT: s_and_b32 s3, s3, 1 1856; GFX9-NEXT: s_cmp_lg_u32 s3, 0 1857; GFX9-NEXT: s_addc_u32 s13, s15, s6 1858; GFX9-NEXT: s_add_u32 s10, s10, s8 1859; GFX9-NEXT: s_cselect_b32 s3, 1, 0 1860; GFX9-NEXT: s_and_b32 s3, s3, 1 1861; GFX9-NEXT: s_cmp_lg_u32 s3, 0 1862; GFX9-NEXT: s_mov_b32 s9, s8 1863; GFX9-NEXT: s_addc_u32 s11, s11, s8 1864; GFX9-NEXT: s_xor_b64 s[10:11], s[10:11], s[8:9] 1865; GFX9-NEXT: v_cvt_f32_u32_e32 v4, s11 1866; GFX9-NEXT: v_cvt_f32_u32_e32 v5, s10 1867; GFX9-NEXT: s_mov_b32 s7, s6 1868; GFX9-NEXT: s_xor_b64 s[12:13], s[12:13], s[6:7] 1869; GFX9-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 1870; GFX9-NEXT: v_add_f32_e32 v4, v4, v5 1871; GFX9-NEXT: v_rcp_iflag_f32_e32 v4, v4 1872; GFX9-NEXT: s_sub_u32 s3, 0, s10 1873; GFX9-NEXT: v_xor_b32_e32 v1, s1, v1 1874; GFX9-NEXT: v_mov_b32_e32 v5, s1 1875; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 1876; GFX9-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 1877; GFX9-NEXT: v_trunc_f32_e32 v6, v6 1878; GFX9-NEXT: v_mul_f32_e32 v7, 0xcf800000, v6 1879; GFX9-NEXT: v_add_f32_e32 v4, v7, v4 1880; GFX9-NEXT: s_cselect_b32 s1, 1, 0 1881; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v4 1882; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 1883; GFX9-NEXT: s_and_b32 s1, s1, 1 1884; GFX9-NEXT: s_cmp_lg_u32 s1, 0 1885; GFX9-NEXT: s_subb_u32 s14, 0, s11 1886; GFX9-NEXT: v_mul_lo_u32 v8, s14, v4 1887; GFX9-NEXT: v_mul_lo_u32 v9, s3, v6 1888; GFX9-NEXT: v_mul_hi_u32 v10, s3, v4 1889; GFX9-NEXT: v_mul_lo_u32 v7, s3, v4 1890; GFX9-NEXT: v_xor_b32_e32 v0, s0, v0 1891; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s0, v0 1892; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc 1893; GFX9-NEXT: v_add3_u32 v5, v8, v9, v10 1894; GFX9-NEXT: v_mul_lo_u32 v8, v6, v7 1895; GFX9-NEXT: v_mul_lo_u32 v9, v4, v5 1896; GFX9-NEXT: v_mul_hi_u32 v10, v4, v7 1897; GFX9-NEXT: v_mul_hi_u32 v7, v6, v7 1898; GFX9-NEXT: v_xor_b32_e32 v3, s2, v3 1899; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v9 1900; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1901; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v10 1902; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1903; GFX9-NEXT: v_mul_lo_u32 v10, v6, v5 1904; GFX9-NEXT: v_add_u32_e32 v8, v9, v8 1905; GFX9-NEXT: v_mul_hi_u32 v9, v4, v5 1906; GFX9-NEXT: v_mul_hi_u32 v5, v6, v5 1907; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v10, v7 1908; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc 1909; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v9 1910; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 1911; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8 1912; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1913; GFX9-NEXT: v_add_u32_e32 v9, v10, v9 1914; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v7 1915; GFX9-NEXT: v_add3_u32 v5, v9, v8, v5 1916; GFX9-NEXT: v_addc_co_u32_e64 v7, s[0:1], v6, v5, vcc 1917; GFX9-NEXT: v_mul_lo_u32 v8, s14, v4 1918; GFX9-NEXT: v_mul_lo_u32 v9, s3, v7 1919; GFX9-NEXT: v_mul_hi_u32 v10, s3, v4 1920; GFX9-NEXT: v_mul_lo_u32 v11, s3, v4 1921; GFX9-NEXT: v_add_u32_e32 v5, v6, v5 1922; GFX9-NEXT: v_xor_b32_e32 v2, s2, v2 1923; GFX9-NEXT: v_add3_u32 v8, v8, v9, v10 1924; GFX9-NEXT: v_mul_lo_u32 v9, v7, v11 1925; GFX9-NEXT: v_mul_lo_u32 v10, v4, v8 1926; GFX9-NEXT: v_mul_hi_u32 v6, v4, v11 1927; GFX9-NEXT: v_mul_hi_u32 v11, v7, v11 1928; GFX9-NEXT: v_mov_b32_e32 v12, s2 1929; GFX9-NEXT: v_add_co_u32_e64 v9, s[0:1], v9, v10 1930; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[0:1] 1931; GFX9-NEXT: v_add_co_u32_e64 v6, s[0:1], v9, v6 1932; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] 1933; GFX9-NEXT: v_mul_lo_u32 v9, v7, v8 1934; GFX9-NEXT: v_add_u32_e32 v6, v10, v6 1935; GFX9-NEXT: v_mul_hi_u32 v10, v4, v8 1936; GFX9-NEXT: v_mul_hi_u32 v7, v7, v8 1937; GFX9-NEXT: v_add_co_u32_e64 v9, s[0:1], v9, v11 1938; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[0:1] 1939; GFX9-NEXT: v_add_co_u32_e64 v9, s[0:1], v9, v10 1940; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[0:1] 1941; GFX9-NEXT: v_add_co_u32_e64 v6, s[0:1], v9, v6 1942; GFX9-NEXT: v_add_u32_e32 v10, v11, v10 1943; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1] 1944; GFX9-NEXT: v_add3_u32 v7, v10, v8, v7 1945; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v5, v7, vcc 1946; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v4, v6 1947; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v5, vcc 1948; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s2, v3 1949; GFX9-NEXT: v_mul_lo_u32 v8, s13, v6 1950; GFX9-NEXT: v_mul_lo_u32 v9, s12, v7 1951; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v2, v12, vcc 1952; GFX9-NEXT: v_mul_hi_u32 v2, s12, v6 1953; GFX9-NEXT: v_mul_hi_u32 v6, s13, v6 1954; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v8, v9 1955; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1956; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 1957; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1958; GFX9-NEXT: v_mul_lo_u32 v3, s13, v7 1959; GFX9-NEXT: v_add_u32_e32 v2, v8, v2 1960; GFX9-NEXT: v_mul_hi_u32 v8, s12, v7 1961; GFX9-NEXT: v_mul_hi_u32 v7, s13, v7 1962; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v6 1963; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1964; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v8 1965; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1966; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v3, v2 1967; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 1968; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1969; GFX9-NEXT: v_add3_u32 v3, v6, v3, v7 1970; GFX9-NEXT: v_mul_lo_u32 v6, s11, v2 1971; GFX9-NEXT: v_mul_lo_u32 v7, s10, v3 1972; GFX9-NEXT: v_mul_hi_u32 v8, s10, v2 1973; GFX9-NEXT: v_mul_lo_u32 v10, s10, v2 1974; GFX9-NEXT: v_mov_b32_e32 v11, s13 1975; GFX9-NEXT: v_mov_b32_e32 v9, s11 1976; GFX9-NEXT: v_add3_u32 v6, v6, v7, v8 1977; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, s12, v10 1978; GFX9-NEXT: v_subb_co_u32_e64 v8, s[0:1], v11, v6, vcc 1979; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v8 1980; GFX9-NEXT: v_sub_u32_e32 v6, s13, v6 1981; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[0:1] 1982; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v7 1983; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[0:1] 1984; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v8 1985; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v9, vcc 1986; GFX9-NEXT: v_cndmask_b32_e64 v10, v10, v11, s[0:1] 1987; GFX9-NEXT: v_subrev_co_u32_e32 v11, vcc, s10, v7 1988; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[0:1], 0, v6, vcc 1989; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s11, v12 1990; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[0:1] 1991; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s10, v11 1992; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[0:1] 1993; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s11, v12 1994; GFX9-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[0:1] 1995; GFX9-NEXT: v_add_co_u32_e64 v14, s[0:1], 1, v2 1996; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v9, vcc 1997; GFX9-NEXT: v_addc_co_u32_e64 v15, s[0:1], 0, v3, s[0:1] 1998; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, 1, v14 1999; GFX9-NEXT: v_addc_co_u32_e32 v16, vcc, 0, v15, vcc 2000; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 2001; GFX9-NEXT: v_cndmask_b32_e32 v9, v14, v9, vcc 2002; GFX9-NEXT: v_cndmask_b32_e32 v14, v15, v16, vcc 2003; GFX9-NEXT: v_subrev_co_u32_e64 v15, s[0:1], s10, v11 2004; GFX9-NEXT: v_subbrev_co_u32_e64 v6, s[0:1], 0, v6, s[0:1] 2005; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v10 2006; GFX9-NEXT: v_cndmask_b32_e32 v6, v12, v6, vcc 2007; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[0:1] 2008; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v15, vcc 2009; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[0:1] 2010; GFX9-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x0 2011; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[0:1] 2012; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[0:1] 2013; GFX9-NEXT: s_xor_b64 s[0:1], s[6:7], s[8:9] 2014; GFX9-NEXT: v_xor_b32_e32 v2, s0, v2 2015; GFX9-NEXT: v_xor_b32_e32 v3, s1, v3 2016; GFX9-NEXT: v_mov_b32_e32 v8, s1 2017; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s0, v2 2018; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v8, vcc 2019; GFX9-NEXT: v_xor_b32_e32 v7, s6, v7 2020; GFX9-NEXT: v_xor_b32_e32 v8, s6, v6 2021; GFX9-NEXT: v_mov_b32_e32 v13, 0 2022; GFX9-NEXT: v_mov_b32_e32 v9, s6 2023; GFX9-NEXT: v_subrev_co_u32_e32 v6, vcc, s6, v7 2024; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v8, v9, vcc 2025; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2026; GFX9-NEXT: global_store_dwordx4 v13, v[0:3], s[12:13] 2027; GFX9-NEXT: global_store_dwordx4 v13, v[4:7], s[14:15] 2028; GFX9-NEXT: s_endpgm 2029; 2030; GFX10-LABEL: sdivrem_v2i64: 2031; GFX10: ; %bb.0: 2032; GFX10-NEXT: s_clause 0x1 2033; GFX10-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x10 2034; GFX10-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x20 2035; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2036; GFX10-NEXT: s_ashr_i32 s2, s17, 31 2037; GFX10-NEXT: s_ashr_i32 s0, s13, 31 2038; GFX10-NEXT: s_add_u32 s8, s16, s2 2039; GFX10-NEXT: s_cselect_b32 s1, 1, 0 2040; GFX10-NEXT: s_and_b32 s1, s1, 1 2041; GFX10-NEXT: s_cmp_lg_u32 s1, 0 2042; GFX10-NEXT: s_addc_u32 s9, s17, s2 2043; GFX10-NEXT: s_add_u32 s6, s12, s0 2044; GFX10-NEXT: s_cselect_b32 s1, 1, 0 2045; GFX10-NEXT: s_and_b32 s3, s1, 1 2046; GFX10-NEXT: s_mov_b32 s1, s0 2047; GFX10-NEXT: s_cmp_lg_u32 s3, 0 2048; GFX10-NEXT: s_mov_b32 s3, s2 2049; GFX10-NEXT: s_addc_u32 s7, s13, s0 2050; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[2:3] 2051; GFX10-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1] 2052; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s7 2053; GFX10-NEXT: s_sub_u32 s20, 0, s6 2054; GFX10-NEXT: s_cselect_b32 s10, 1, 0 2055; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s6 2056; GFX10-NEXT: s_and_b32 s10, s10, 1 2057; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 2058; GFX10-NEXT: s_cmp_lg_u32 s10, 0 2059; GFX10-NEXT: s_subb_u32 s21, 0, s7 2060; GFX10-NEXT: s_ashr_i32 s10, s19, 31 2061; GFX10-NEXT: v_add_f32_e32 v0, v1, v0 2062; GFX10-NEXT: s_ashr_i32 s12, s15, 31 2063; GFX10-NEXT: s_xor_b64 s[16:17], s[2:3], s[0:1] 2064; GFX10-NEXT: s_add_u32 s0, s18, s10 2065; GFX10-NEXT: s_cselect_b32 s1, 1, 0 2066; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 2067; GFX10-NEXT: s_and_b32 s1, s1, 1 2068; GFX10-NEXT: s_mov_b32 s13, s12 2069; GFX10-NEXT: s_cmp_lg_u32 s1, 0 2070; GFX10-NEXT: s_mov_b32 s11, s10 2071; GFX10-NEXT: s_addc_u32 s1, s19, s10 2072; GFX10-NEXT: s_add_u32 s14, s14, s12 2073; GFX10-NEXT: s_cselect_b32 s3, 1, 0 2074; GFX10-NEXT: s_and_b32 s3, s3, 1 2075; GFX10-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 2076; GFX10-NEXT: s_cmp_lg_u32 s3, 0 2077; GFX10-NEXT: s_addc_u32 s15, s15, s12 2078; GFX10-NEXT: s_xor_b64 s[18:19], s[0:1], s[10:11] 2079; GFX10-NEXT: s_xor_b64 s[14:15], s[14:15], s[12:13] 2080; GFX10-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 2081; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s15 2082; GFX10-NEXT: v_cvt_f32_u32_e32 v3, s14 2083; GFX10-NEXT: s_sub_u32 s3, 0, s14 2084; GFX10-NEXT: s_cselect_b32 s0, 1, 0 2085; GFX10-NEXT: v_trunc_f32_e32 v2, v2 2086; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 2087; GFX10-NEXT: s_and_b32 s0, s0, 1 2088; GFX10-NEXT: s_cmp_lg_u32 s0, 0 2089; GFX10-NEXT: v_add_f32_e32 v1, v1, v3 2090; GFX10-NEXT: v_mul_f32_e32 v3, 0xcf800000, v2 2091; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 2092; GFX10-NEXT: s_subb_u32 s22, 0, s15 2093; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 2094; GFX10-NEXT: v_add_f32_e32 v0, v3, v0 2095; GFX10-NEXT: v_mul_lo_u32 v3, s20, v2 2096; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 2097; GFX10-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 2098; GFX10-NEXT: v_mul_lo_u32 v4, s21, v0 2099; GFX10-NEXT: v_mul_hi_u32 v5, s20, v0 2100; GFX10-NEXT: v_mul_lo_u32 v6, s20, v0 2101; GFX10-NEXT: v_mul_f32_e32 v7, 0x2f800000, v1 2102; GFX10-NEXT: v_add3_u32 v3, v4, v3, v5 2103; GFX10-NEXT: v_trunc_f32_e32 v4, v7 2104; GFX10-NEXT: v_mul_lo_u32 v5, v2, v6 2105; GFX10-NEXT: v_mul_hi_u32 v7, v0, v6 2106; GFX10-NEXT: v_mul_hi_u32 v6, v2, v6 2107; GFX10-NEXT: v_mul_lo_u32 v8, v0, v3 2108; GFX10-NEXT: v_mul_lo_u32 v10, v2, v3 2109; GFX10-NEXT: v_mul_f32_e32 v9, 0xcf800000, v4 2110; GFX10-NEXT: v_mul_hi_u32 v11, v0, v3 2111; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v4 2112; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 2113; GFX10-NEXT: v_add_f32_e32 v1, v9, v1 2114; GFX10-NEXT: v_add_co_u32 v5, s0, v5, v8 2115; GFX10-NEXT: v_mul_lo_u32 v9, s3, v4 2116; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0 2117; GFX10-NEXT: v_add_co_u32 v6, s0, v10, v6 2118; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 2119; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s0 2120; GFX10-NEXT: v_add_co_u32 v5, s0, v5, v7 2121; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 2122; GFX10-NEXT: v_mul_lo_u32 v12, s22, v1 2123; GFX10-NEXT: v_mul_hi_u32 v13, s3, v1 2124; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v11 2125; GFX10-NEXT: v_mul_lo_u32 v11, s3, v1 2126; GFX10-NEXT: v_add_nc_u32_e32 v5, v8, v5 2127; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 2128; GFX10-NEXT: v_add_co_u32 v5, s0, v6, v5 2129; GFX10-NEXT: v_add_nc_u32_e32 v7, v10, v7 2130; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 2131; GFX10-NEXT: v_add3_u32 v8, v12, v9, v13 2132; GFX10-NEXT: v_mul_lo_u32 v9, v4, v11 2133; GFX10-NEXT: v_mul_hi_u32 v10, v1, v11 2134; GFX10-NEXT: v_mul_hi_u32 v11, v4, v11 2135; GFX10-NEXT: v_add3_u32 v3, v7, v6, v3 2136; GFX10-NEXT: v_mul_lo_u32 v6, v1, v8 2137; GFX10-NEXT: v_mul_lo_u32 v7, v4, v8 2138; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v5 2139; GFX10-NEXT: v_mul_hi_u32 v5, v1, v8 2140; GFX10-NEXT: v_add_co_ci_u32_e64 v12, s0, v2, v3, vcc_lo 2141; GFX10-NEXT: v_mul_hi_u32 v8, v4, v8 2142; GFX10-NEXT: v_add_nc_u32_e32 v2, v2, v3 2143; GFX10-NEXT: v_mul_lo_u32 v14, s21, v0 2144; GFX10-NEXT: v_add_co_u32 v6, s0, v9, v6 2145; GFX10-NEXT: v_mul_hi_u32 v15, s20, v0 2146; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s0 2147; GFX10-NEXT: v_add_co_u32 v7, s0, v7, v11 2148; GFX10-NEXT: v_mul_lo_u32 v16, s20, v12 2149; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s0 2150; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v10 2151; GFX10-NEXT: v_mul_lo_u32 v13, s20, v0 2152; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 2153; GFX10-NEXT: v_add_co_u32 v5, s0, v7, v5 2154; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 2155; GFX10-NEXT: v_add3_u32 v14, v14, v16, v15 2156; GFX10-NEXT: v_add_nc_u32_e32 v6, v9, v6 2157; GFX10-NEXT: v_mul_lo_u32 v10, v12, v13 2158; GFX10-NEXT: v_add_nc_u32_e32 v7, v11, v7 2159; GFX10-NEXT: v_mul_lo_u32 v11, v0, v14 2160; GFX10-NEXT: v_add_co_u32 v5, s0, v5, v6 2161; GFX10-NEXT: v_mul_hi_u32 v9, v0, v13 2162; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 2163; GFX10-NEXT: v_mul_hi_u32 v13, v12, v13 2164; GFX10-NEXT: v_mul_lo_u32 v15, v12, v14 2165; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v5 2166; GFX10-NEXT: v_mul_hi_u32 v16, v0, v14 2167; GFX10-NEXT: v_add3_u32 v6, v7, v6, v8 2168; GFX10-NEXT: v_add_co_u32 v5, s1, v10, v11 2169; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s1 2170; GFX10-NEXT: v_mul_lo_u32 v3, s3, v1 2171; GFX10-NEXT: v_add_co_u32 v8, s1, v15, v13 2172; GFX10-NEXT: v_mul_lo_u32 v13, s22, v1 2173; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s1 2174; GFX10-NEXT: v_add_co_ci_u32_e64 v11, s1, v4, v6, s0 2175; GFX10-NEXT: v_add_co_u32 v5, s1, v5, v9 2176; GFX10-NEXT: v_mul_hi_u32 v15, s3, v1 2177; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s1 2178; GFX10-NEXT: v_add_co_u32 v8, s1, v8, v16 2179; GFX10-NEXT: v_mul_lo_u32 v9, s3, v11 2180; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, 1, s1 2181; GFX10-NEXT: v_add_nc_u32_e32 v4, v4, v6 2182; GFX10-NEXT: v_add_nc_u32_e32 v5, v7, v5 2183; GFX10-NEXT: v_mul_hi_u32 v7, v12, v14 2184; GFX10-NEXT: v_mul_lo_u32 v12, v11, v3 2185; GFX10-NEXT: v_add_nc_u32_e32 v10, v10, v16 2186; GFX10-NEXT: s_load_dwordx4 s[20:23], s[4:5], 0x0 2187; GFX10-NEXT: v_add_co_u32 v5, s1, v8, v5 2188; GFX10-NEXT: v_add3_u32 v9, v13, v9, v15 2189; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s1 2190; GFX10-NEXT: v_mul_hi_u32 v13, v1, v3 2191; GFX10-NEXT: v_mul_hi_u32 v3, v11, v3 2192; GFX10-NEXT: v_mul_lo_u32 v14, v1, v9 2193; GFX10-NEXT: v_add3_u32 v7, v10, v8, v7 2194; GFX10-NEXT: v_mul_lo_u32 v8, v11, v9 2195; GFX10-NEXT: v_mul_hi_u32 v10, v1, v9 2196; GFX10-NEXT: v_mul_hi_u32 v9, v11, v9 2197; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v7, vcc_lo 2198; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v5 2199; GFX10-NEXT: v_add_co_u32 v7, s1, v12, v14 2200; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s1 2201; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo 2202; GFX10-NEXT: v_add_co_u32 v3, s1, v8, v3 2203; GFX10-NEXT: v_mul_lo_u32 v8, s9, v0 2204; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s1 2205; GFX10-NEXT: v_add_co_u32 v7, s1, v7, v13 2206; GFX10-NEXT: v_mul_lo_u32 v14, s8, v2 2207; GFX10-NEXT: v_mul_hi_u32 v12, s8, v0 2208; GFX10-NEXT: v_mul_hi_u32 v0, s9, v0 2209; GFX10-NEXT: v_mul_lo_u32 v13, s9, v2 2210; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s1 2211; GFX10-NEXT: v_add_co_u32 v3, s1, v3, v10 2212; GFX10-NEXT: v_mul_hi_u32 v15, s8, v2 2213; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s1 2214; GFX10-NEXT: v_add_co_u32 v8, s1, v8, v14 2215; GFX10-NEXT: v_add_nc_u32_e32 v7, v11, v7 2216; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s1 2217; GFX10-NEXT: v_add_co_u32 v0, s1, v13, v0 2218; GFX10-NEXT: v_mul_hi_u32 v2, s9, v2 2219; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s1 2220; GFX10-NEXT: v_add_co_u32 v8, s1, v8, v12 2221; GFX10-NEXT: v_add_nc_u32_e32 v5, v5, v10 2222; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s1 2223; GFX10-NEXT: v_add_co_u32 v0, s1, v0, v15 2224; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s1 2225; GFX10-NEXT: v_add_nc_u32_e32 v8, v14, v8 2226; GFX10-NEXT: v_add_nc_u32_e32 v10, v13, v12 2227; GFX10-NEXT: v_add_co_u32 v0, s1, v0, v8 2228; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s1 2229; GFX10-NEXT: v_add_co_u32 v3, s1, v3, v7 2230; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s1 2231; GFX10-NEXT: v_mul_lo_u32 v6, s7, v0 2232; GFX10-NEXT: v_add3_u32 v2, v10, v8, v2 2233; GFX10-NEXT: v_add3_u32 v5, v5, v7, v9 2234; GFX10-NEXT: v_mul_hi_u32 v7, s6, v0 2235; GFX10-NEXT: v_mul_lo_u32 v8, s6, v2 2236; GFX10-NEXT: v_mov_b32_e32 v9, 0 2237; GFX10-NEXT: v_add_co_ci_u32_e64 v4, vcc_lo, v4, v5, s0 2238; GFX10-NEXT: v_mul_lo_u32 v5, s6, v0 2239; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, v3 2240; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v4, vcc_lo 2241; GFX10-NEXT: v_add3_u32 v4, v6, v8, v7 2242; GFX10-NEXT: v_mul_lo_u32 v6, s19, v1 2243; GFX10-NEXT: v_mul_hi_u32 v7, s19, v1 2244; GFX10-NEXT: v_sub_co_u32 v5, vcc_lo, s8, v5 2245; GFX10-NEXT: v_mul_lo_u32 v14, s18, v3 2246; GFX10-NEXT: v_sub_nc_u32_e32 v8, s9, v4 2247; GFX10-NEXT: v_sub_co_ci_u32_e64 v4, s0, s9, v4, vcc_lo 2248; GFX10-NEXT: v_mul_lo_u32 v15, s19, v3 2249; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v5 2250; GFX10-NEXT: v_mul_hi_u32 v1, s18, v1 2251; GFX10-NEXT: v_subrev_co_ci_u32_e32 v8, vcc_lo, s7, v8, vcc_lo 2252; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v4 2253; GFX10-NEXT: v_mul_hi_u32 v17, s18, v3 2254; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, -1, s0 2255; GFX10-NEXT: v_mul_hi_u32 v3, s19, v3 2256; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc_lo 2257; GFX10-NEXT: v_sub_co_u32 v12, vcc_lo, v5, s6 2258; GFX10-NEXT: v_subrev_co_ci_u32_e64 v13, s0, 0, v8, vcc_lo 2259; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v4 2260; GFX10-NEXT: v_subrev_co_ci_u32_e32 v8, vcc_lo, s7, v8, vcc_lo 2261; GFX10-NEXT: v_cndmask_b32_e64 v10, v11, v10, s0 2262; GFX10-NEXT: v_cmp_le_u32_e64 s0, s7, v13 2263; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 2264; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, s0 2265; GFX10-NEXT: v_cmp_le_u32_e64 s0, s6, v12 2266; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, -1, s0 2267; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v14 2268; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s0 2269; GFX10-NEXT: v_add_co_u32 v7, s0, v15, v7 2270; GFX10-NEXT: v_add_co_u32 v1, s1, v6, v1 2271; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s0 2272; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s1 2273; GFX10-NEXT: v_add_co_u32 v7, s0, v7, v17 2274; GFX10-NEXT: v_cndmask_b32_e64 v15, 0, 1, s0 2275; GFX10-NEXT: v_add_co_u32 v17, s0, v0, 1 2276; GFX10-NEXT: v_add_nc_u32_e32 v1, v14, v1 2277; GFX10-NEXT: v_add_co_ci_u32_e64 v18, s0, 0, v2, s0 2278; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s7, v13 2279; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v15 2280; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v16, s0 2281; GFX10-NEXT: v_add_co_u32 v7, s0, v7, v1 2282; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 2283; GFX10-NEXT: v_add_co_u32 v14, s0, v17, 1 2284; GFX10-NEXT: v_add_co_ci_u32_e64 v15, s0, 0, v18, s0 2285; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v11 2286; GFX10-NEXT: v_add3_u32 v3, v6, v1, v3 2287; GFX10-NEXT: v_mul_lo_u32 v10, s15, v7 2288; GFX10-NEXT: v_mul_lo_u32 v16, s14, v7 2289; GFX10-NEXT: v_cndmask_b32_e64 v6, v18, v15, s0 2290; GFX10-NEXT: v_mul_lo_u32 v11, s14, v3 2291; GFX10-NEXT: v_mul_hi_u32 v15, s14, v7 2292; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v14, s0 2293; GFX10-NEXT: v_sub_co_u32 v14, s1, v12, s6 2294; GFX10-NEXT: v_subrev_co_ci_u32_e64 v8, s1, 0, v8, s1 2295; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 2296; GFX10-NEXT: v_cndmask_b32_e32 v1, v2, v6, vcc_lo 2297; GFX10-NEXT: v_add3_u32 v6, v10, v11, v15 2298; GFX10-NEXT: v_cndmask_b32_e64 v2, v13, v8, s0 2299; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v14, s0 2300; GFX10-NEXT: v_sub_co_u32 v8, s0, s18, v16 2301; GFX10-NEXT: v_xor_b32_e32 v0, s16, v0 2302; GFX10-NEXT: v_sub_co_ci_u32_e64 v10, s1, s19, v6, s0 2303; GFX10-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc_lo 2304; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc_lo 2305; GFX10-NEXT: v_sub_nc_u32_e32 v4, s19, v6 2306; GFX10-NEXT: v_xor_b32_e32 v1, s17, v1 2307; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s15, v10 2308; GFX10-NEXT: v_xor_b32_e32 v2, s2, v2 2309; GFX10-NEXT: v_xor_b32_e32 v5, s2, v5 2310; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc_lo 2311; GFX10-NEXT: v_subrev_co_ci_u32_e64 v4, vcc_lo, s15, v4, s0 2312; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s14, v8 2313; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc_lo 2314; GFX10-NEXT: v_sub_co_u32 v12, vcc_lo, v8, s14 2315; GFX10-NEXT: v_subrev_co_ci_u32_e64 v13, s0, 0, v4, vcc_lo 2316; GFX10-NEXT: v_sub_co_u32 v0, s0, v0, s16 2317; GFX10-NEXT: v_subrev_co_ci_u32_e32 v4, vcc_lo, s15, v4, vcc_lo 2318; GFX10-NEXT: v_subrev_co_ci_u32_e64 v1, s0, s17, v1, s0 2319; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s15, v10 2320; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v11, s0 2321; GFX10-NEXT: v_cmp_le_u32_e64 s0, s15, v13 2322; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, -1, s0 2323; GFX10-NEXT: v_cmp_le_u32_e64 s0, s14, v12 2324; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, -1, s0 2325; GFX10-NEXT: v_add_co_u32 v15, s0, v7, 1 2326; GFX10-NEXT: v_add_co_ci_u32_e64 v16, s0, 0, v3, s0 2327; GFX10-NEXT: v_cmp_eq_u32_e64 s0, s15, v13 2328; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v14, s0 2329; GFX10-NEXT: v_add_co_u32 v14, s0, v15, 1 2330; GFX10-NEXT: v_add_co_ci_u32_e64 v17, s0, 0, v16, s0 2331; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v11 2332; GFX10-NEXT: v_sub_co_u32 v11, s0, v12, s14 2333; GFX10-NEXT: v_subrev_co_ci_u32_e64 v4, s0, 0, v4, s0 2334; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v6 2335; GFX10-NEXT: v_cndmask_b32_e32 v14, v15, v14, vcc_lo 2336; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v17, vcc_lo 2337; GFX10-NEXT: v_cndmask_b32_e32 v6, v12, v11, vcc_lo 2338; GFX10-NEXT: v_cndmask_b32_e32 v4, v13, v4, vcc_lo 2339; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v14, s0 2340; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v15, s0 2341; GFX10-NEXT: v_cndmask_b32_e64 v6, v8, v6, s0 2342; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v4, s0 2343; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v5, s2 2344; GFX10-NEXT: s_xor_b64 s[0:1], s[10:11], s[12:13] 2345; GFX10-NEXT: v_subrev_co_ci_u32_e32 v5, vcc_lo, s2, v2, vcc_lo 2346; GFX10-NEXT: v_xor_b32_e32 v2, s0, v7 2347; GFX10-NEXT: v_xor_b32_e32 v3, s1, v3 2348; GFX10-NEXT: v_xor_b32_e32 v6, s10, v6 2349; GFX10-NEXT: v_xor_b32_e32 v7, s10, v8 2350; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v2, s0 2351; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v3, vcc_lo 2352; GFX10-NEXT: v_sub_co_u32 v6, vcc_lo, v6, s10 2353; GFX10-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, s10, v7, vcc_lo 2354; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2355; GFX10-NEXT: global_store_dwordx4 v9, v[0:3], s[20:21] 2356; GFX10-NEXT: global_store_dwordx4 v9, v[4:7], s[22:23] 2357; GFX10-NEXT: s_endpgm 2358 %div = sdiv <2 x i64> %x, %y 2359 store <2 x i64> %div, <2 x i64> addrspace(1)* %out0 2360 %rem = srem <2 x i64> %x, %y 2361 store <2 x i64> %rem, <2 x i64> addrspace(1)* %out1 2362 ret void 2363} 2364 2365define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out1, i8 %x, i8 %y) { 2366; GFX8-LABEL: sdiv_i8: 2367; GFX8: ; %bb.0: 2368; GFX8-NEXT: s_load_dword s0, s[4:5], 0x10 2369; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2370; GFX8-NEXT: s_bfe_i32 s1, s0, 0x80008 2371; GFX8-NEXT: s_ashr_i32 s6, s1, 31 2372; GFX8-NEXT: s_add_i32 s1, s1, s6 2373; GFX8-NEXT: s_xor_b32 s7, s1, s6 2374; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s7 2375; GFX8-NEXT: s_sub_i32 s1, 0, s7 2376; GFX8-NEXT: s_sext_i32_i8 s0, s0 2377; GFX8-NEXT: s_ashr_i32 s8, s0, 31 2378; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 2379; GFX8-NEXT: s_add_i32 s0, s0, s8 2380; GFX8-NEXT: s_xor_b32 s9, s0, s8 2381; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2382; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 2383; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 2384; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2385; GFX8-NEXT: s_xor_b32 s4, s8, s6 2386; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 2387; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 2388; GFX8-NEXT: v_mul_hi_u32 v2, s9, v0 2389; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2390; GFX8-NEXT: v_mov_b32_e32 v0, s0 2391; GFX8-NEXT: v_mov_b32_e32 v1, s1 2392; GFX8-NEXT: v_mul_lo_u32 v3, v2, s7 2393; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 2394; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s9, v3 2395; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 2396; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2397; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 2398; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2399; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 2400; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 2401; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2402; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 2403; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2 2404; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2405; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s4, v2 2406; GFX8-NEXT: v_xor_b32_e32 v3, s8, v3 2407; GFX8-NEXT: flat_store_byte v[0:1], v2 2408; GFX8-NEXT: v_mov_b32_e32 v0, s2 2409; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s8, v3 2410; GFX8-NEXT: v_mov_b32_e32 v1, s3 2411; GFX8-NEXT: flat_store_byte v[0:1], v3 2412; GFX8-NEXT: s_endpgm 2413; 2414; GFX9-LABEL: sdiv_i8: 2415; GFX9: ; %bb.0: 2416; GFX9-NEXT: s_load_dword s0, s[4:5], 0x10 2417; GFX9-NEXT: v_mov_b32_e32 v2, 0 2418; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2419; GFX9-NEXT: s_bfe_i32 s1, s0, 0x80008 2420; GFX9-NEXT: s_ashr_i32 s6, s1, 31 2421; GFX9-NEXT: s_add_i32 s1, s1, s6 2422; GFX9-NEXT: s_xor_b32 s7, s1, s6 2423; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 2424; GFX9-NEXT: s_sub_i32 s1, 0, s7 2425; GFX9-NEXT: s_sext_i32_i8 s0, s0 2426; GFX9-NEXT: s_ashr_i32 s8, s0, 31 2427; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 2428; GFX9-NEXT: s_add_i32 s0, s0, s8 2429; GFX9-NEXT: s_xor_b32 s9, s0, s8 2430; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2431; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 2432; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 2433; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2434; GFX9-NEXT: s_xor_b32 s4, s8, s6 2435; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 2436; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 2437; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 2438; GFX9-NEXT: v_mul_lo_u32 v1, v0, s7 2439; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 2440; GFX9-NEXT: v_sub_u32_e32 v1, s9, v1 2441; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 2442; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2443; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 2444; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2445; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 2446; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 2447; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2448; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 2449; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2450; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 2451; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 2452; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 2453; GFX9-NEXT: v_subrev_u32_e32 v1, s8, v1 2454; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2455; GFX9-NEXT: global_store_byte v2, v0, s[0:1] 2456; GFX9-NEXT: global_store_byte v2, v1, s[2:3] 2457; GFX9-NEXT: s_endpgm 2458; 2459; GFX10-LABEL: sdiv_i8: 2460; GFX10: ; %bb.0: 2461; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 2462; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2463; GFX10-NEXT: s_bfe_i32 s1, s0, 0x80008 2464; GFX10-NEXT: s_sext_i32_i8 s0, s0 2465; GFX10-NEXT: s_ashr_i32 s6, s1, 31 2466; GFX10-NEXT: s_ashr_i32 s8, s0, 31 2467; GFX10-NEXT: s_add_i32 s1, s1, s6 2468; GFX10-NEXT: s_add_i32 s0, s0, s8 2469; GFX10-NEXT: s_xor_b32 s7, s1, s6 2470; GFX10-NEXT: s_xor_b32 s0, s0, s8 2471; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s7 2472; GFX10-NEXT: s_sub_i32 s1, 0, s7 2473; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 2474; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2475; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 2476; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 2477; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 2478; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 2479; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 2480; GFX10-NEXT: v_mul_lo_u32 v1, v0, s7 2481; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 2482; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 2483; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2484; GFX10-NEXT: s_xor_b32 s4, s8, s6 2485; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 2486; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 2487; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2488; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2489; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 2490; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 2491; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 2492; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2493; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2494; GFX10-NEXT: v_mov_b32_e32 v2, 0 2495; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 2496; GFX10-NEXT: v_xor_b32_e32 v1, s8, v1 2497; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 2498; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s8, v1 2499; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2500; GFX10-NEXT: global_store_byte v2, v0, s[0:1] 2501; GFX10-NEXT: global_store_byte v2, v1, s[2:3] 2502; GFX10-NEXT: s_endpgm 2503 %div = sdiv i8 %x, %y 2504 store i8 %div, i8 addrspace(1)* %out0 2505 %rem = srem i8 %x, %y 2506 store i8 %rem, i8 addrspace(1)* %out1 2507 ret void 2508} 2509 2510define amdgpu_kernel void @sdivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> addrspace(1)* %out1, <2 x i8> %x, <2 x i8> %y) { 2511; GFX8-LABEL: sdivrem_v2i8: 2512; GFX8: ; %bb.0: 2513; GFX8-NEXT: s_load_dword s2, s[4:5], 0x10 2514; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2515; GFX8-NEXT: s_bfe_i32 s0, s2, 0x80010 2516; GFX8-NEXT: s_ashr_i32 s3, s0, 31 2517; GFX8-NEXT: s_add_i32 s0, s0, s3 2518; GFX8-NEXT: s_xor_b32 s8, s0, s3 2519; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s8 2520; GFX8-NEXT: s_sub_i32 s6, 0, s8 2521; GFX8-NEXT: s_bfe_i32 s1, s2, 0x80018 2522; GFX8-NEXT: s_ashr_i32 s10, s1, 31 2523; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 2524; GFX8-NEXT: s_add_i32 s1, s1, s10 2525; GFX8-NEXT: s_xor_b32 s11, s1, s10 2526; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s11 2527; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2528; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 2529; GFX8-NEXT: s_sext_i32_i8 s0, s2 2530; GFX8-NEXT: s_ashr_i32 s9, s0, 31 2531; GFX8-NEXT: s_add_i32 s0, s0, s9 2532; GFX8-NEXT: v_mul_lo_u32 v1, s6, v0 2533; GFX8-NEXT: s_xor_b32 s0, s0, s9 2534; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 2535; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 2536; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 2537; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 2538; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 2539; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 2540; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 2541; GFX8-NEXT: v_mul_lo_u32 v2, v0, s8 2542; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 2543; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s0, v2 2544; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 2545; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2546; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s8, v2 2547; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2548; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 2549; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 2550; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2551; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s8, v2 2552; GFX8-NEXT: s_sub_i32 s1, 0, s11 2553; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2554; GFX8-NEXT: v_mul_lo_u32 v3, s1, v1 2555; GFX8-NEXT: s_bfe_i32 s1, s2, 0x80008 2556; GFX8-NEXT: s_ashr_i32 s2, s1, 31 2557; GFX8-NEXT: s_add_i32 s1, s1, s2 2558; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 2559; GFX8-NEXT: s_xor_b32 s1, s1, s2 2560; GFX8-NEXT: s_xor_b32 s0, s9, s3 2561; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 2562; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 2563; GFX8-NEXT: v_mul_hi_u32 v1, s1, v1 2564; GFX8-NEXT: v_xor_b32_e32 v2, s9, v2 2565; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 2566; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s9, v2 2567; GFX8-NEXT: v_mul_lo_u32 v3, v1, s11 2568; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 2569; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s1, v3 2570; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 2571; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2572; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s11, v3 2573; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2574; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 2575; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s11, v3 2576; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2577; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s11, v3 2578; GFX8-NEXT: s_xor_b32 s0, s2, s10 2579; GFX8-NEXT: v_xor_b32_e32 v1, s0, v1 2580; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2581; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, s0, v1 2582; GFX8-NEXT: s_movk_i32 s0, 0xff 2583; GFX8-NEXT: v_and_b32_e32 v1, s0, v1 2584; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 2585; GFX8-NEXT: v_xor_b32_e32 v3, s2, v3 2586; GFX8-NEXT: v_or_b32_sdwa v4, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2587; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2588; GFX8-NEXT: v_mov_b32_e32 v0, s4 2589; GFX8-NEXT: v_mov_b32_e32 v1, s5 2590; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s2, v3 2591; GFX8-NEXT: flat_store_short v[0:1], v4 2592; GFX8-NEXT: v_and_b32_e32 v0, s0, v3 2593; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 2594; GFX8-NEXT: v_or_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2595; GFX8-NEXT: v_mov_b32_e32 v0, s6 2596; GFX8-NEXT: v_mov_b32_e32 v1, s7 2597; GFX8-NEXT: flat_store_short v[0:1], v2 2598; GFX8-NEXT: s_endpgm 2599; 2600; GFX9-LABEL: sdivrem_v2i8: 2601; GFX9: ; %bb.0: 2602; GFX9-NEXT: s_load_dword s6, s[4:5], 0x10 2603; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2604; GFX9-NEXT: s_bfe_i32 s0, s6, 0x80010 2605; GFX9-NEXT: s_ashr_i32 s7, s0, 31 2606; GFX9-NEXT: s_add_i32 s0, s0, s7 2607; GFX9-NEXT: s_xor_b32 s8, s0, s7 2608; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s8 2609; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2610; GFX9-NEXT: s_bfe_i32 s5, s6, 0x80018 2611; GFX9-NEXT: s_ashr_i32 s9, s5, 31 2612; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 2613; GFX9-NEXT: s_add_i32 s5, s5, s9 2614; GFX9-NEXT: s_xor_b32 s5, s5, s9 2615; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s5 2616; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2617; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 2618; GFX9-NEXT: s_sub_i32 s10, 0, s8 2619; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 2620; GFX9-NEXT: s_sext_i32_i8 s4, s6 2621; GFX9-NEXT: v_mul_lo_u32 v2, s10, v0 2622; GFX9-NEXT: s_ashr_i32 s10, s4, 31 2623; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 2624; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 2625; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 2626; GFX9-NEXT: s_add_i32 s4, s4, s10 2627; GFX9-NEXT: s_xor_b32 s4, s4, s10 2628; GFX9-NEXT: s_sub_i32 s11, 0, s5 2629; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 2630; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 2631; GFX9-NEXT: v_mul_lo_u32 v2, s11, v1 2632; GFX9-NEXT: s_bfe_i32 s6, s6, 0x80008 2633; GFX9-NEXT: s_ashr_i32 s11, s6, 31 2634; GFX9-NEXT: v_mul_lo_u32 v3, v0, s8 2635; GFX9-NEXT: v_mul_hi_u32 v2, v1, v2 2636; GFX9-NEXT: s_add_i32 s6, s6, s11 2637; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 2638; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 2639; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 2640; GFX9-NEXT: s_xor_b32 s4, s6, s11 2641; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 2642; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2643; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v3 2644; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 2645; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2646; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 2647; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 2648; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 2649; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v3 2650; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc 2651; GFX9-NEXT: v_mul_lo_u32 v3, v1, s5 2652; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 2653; GFX9-NEXT: s_xor_b32 s6, s10, s7 2654; GFX9-NEXT: v_xor_b32_e32 v0, s6, v0 2655; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 2656; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 2657; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2658; GFX9-NEXT: v_subrev_u32_e32 v4, s5, v3 2659; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2660; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 2661; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v3 2662; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2663; GFX9-NEXT: s_xor_b32 s4, s11, s9 2664; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 2665; GFX9-NEXT: v_subrev_u32_e32 v4, s5, v3 2666; GFX9-NEXT: v_subrev_u32_e32 v1, s4, v1 2667; GFX9-NEXT: s_movk_i32 s4, 0xff 2668; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2669; GFX9-NEXT: v_and_b32_e32 v1, s4, v1 2670; GFX9-NEXT: v_xor_b32_e32 v3, s11, v3 2671; GFX9-NEXT: v_subrev_u32_e32 v0, s6, v0 2672; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 2673; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2674; GFX9-NEXT: v_mov_b32_e32 v1, 0 2675; GFX9-NEXT: v_subrev_u32_e32 v3, s11, v3 2676; GFX9-NEXT: v_xor_b32_e32 v2, s10, v2 2677; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2678; GFX9-NEXT: global_store_short v1, v0, s[0:1] 2679; GFX9-NEXT: v_and_b32_e32 v0, s4, v3 2680; GFX9-NEXT: v_subrev_u32_e32 v2, s10, v2 2681; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0 2682; GFX9-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2683; GFX9-NEXT: global_store_short v1, v0, s[2:3] 2684; GFX9-NEXT: s_endpgm 2685; 2686; GFX10-LABEL: sdivrem_v2i8: 2687; GFX10: ; %bb.0: 2688; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 2689; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2690; GFX10-NEXT: s_bfe_i32 s1, s0, 0x80018 2691; GFX10-NEXT: s_bfe_i32 s2, s0, 0x80010 2692; GFX10-NEXT: s_ashr_i32 s3, s1, 31 2693; GFX10-NEXT: s_ashr_i32 s8, s2, 31 2694; GFX10-NEXT: s_add_i32 s1, s1, s3 2695; GFX10-NEXT: s_add_i32 s2, s2, s8 2696; GFX10-NEXT: s_xor_b32 s1, s1, s3 2697; GFX10-NEXT: s_xor_b32 s2, s2, s8 2698; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s1 2699; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s2 2700; GFX10-NEXT: s_sub_i32 s6, 0, s1 2701; GFX10-NEXT: s_sub_i32 s7, 0, s2 2702; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 2703; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 2704; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2705; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 2706; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 2707; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 2708; GFX10-NEXT: v_mul_lo_u32 v2, s6, v0 2709; GFX10-NEXT: v_mul_lo_u32 v3, s7, v1 2710; GFX10-NEXT: s_sext_i32_i8 s6, s0 2711; GFX10-NEXT: s_bfe_i32 s0, s0, 0x80008 2712; GFX10-NEXT: s_ashr_i32 s9, s6, 31 2713; GFX10-NEXT: s_ashr_i32 s10, s0, 31 2714; GFX10-NEXT: s_add_i32 s6, s6, s9 2715; GFX10-NEXT: s_add_i32 s0, s0, s10 2716; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 2717; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 2718; GFX10-NEXT: s_xor_b32 s0, s0, s10 2719; GFX10-NEXT: s_xor_b32 s6, s6, s9 2720; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 2721; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 2722; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 2723; GFX10-NEXT: v_mul_hi_u32 v1, s6, v1 2724; GFX10-NEXT: v_mul_lo_u32 v2, v0, s1 2725; GFX10-NEXT: v_mul_lo_u32 v3, v1, s2 2726; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v0 2727; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v1 2728; GFX10-NEXT: v_sub_nc_u32_e32 v2, s0, v2 2729; GFX10-NEXT: v_sub_nc_u32_e32 v3, s6, v3 2730; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 2731; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v2 2732; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s1, v2 2733; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v3 2734; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s2, v3 2735; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2736; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo 2737; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v4, s0 2738; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s0 2739; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v0 2740; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v2 2741; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s1, v2 2742; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v1 2743; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v3 2744; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s2, v3 2745; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 2746; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo 2747; GFX10-NEXT: s_xor_b32 s1, s10, s3 2748; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v4, s0 2749; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s0 2750; GFX10-NEXT: v_xor_b32_e32 v0, s1, v0 2751; GFX10-NEXT: v_xor_b32_e32 v2, s10, v2 2752; GFX10-NEXT: s_xor_b32 s0, s9, s8 2753; GFX10-NEXT: v_xor_b32_e32 v1, s0, v1 2754; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s1, v0 2755; GFX10-NEXT: v_xor_b32_e32 v3, s9, v3 2756; GFX10-NEXT: v_subrev_nc_u32_e32 v2, s10, v2 2757; GFX10-NEXT: s_movk_i32 s1, 0xff 2758; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s0, v1 2759; GFX10-NEXT: v_and_b32_sdwa v0, v0, s1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2760; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s9, v3 2761; GFX10-NEXT: v_and_b32_sdwa v2, v2, s1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2762; GFX10-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2763; GFX10-NEXT: v_mov_b32_e32 v1, 0 2764; GFX10-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2765; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2766; GFX10-NEXT: global_store_short v1, v0, s[4:5] 2767; GFX10-NEXT: global_store_short v1, v2, s[6:7] 2768; GFX10-NEXT: s_endpgm 2769 %div = sdiv <2 x i8> %x, %y 2770 store <2 x i8> %div, <2 x i8> addrspace(1)* %out0 2771 %rem = srem <2 x i8> %x, %y 2772 store <2 x i8> %rem, <2 x i8> addrspace(1)* %out1 2773 ret void 2774} 2775 2776define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* %out1, i16 %x, i16 %y) { 2777; GFX8-LABEL: sdiv_i16: 2778; GFX8: ; %bb.0: 2779; GFX8-NEXT: s_load_dword s0, s[4:5], 0x10 2780; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2781; GFX8-NEXT: s_bfe_i32 s1, s0, 0x100010 2782; GFX8-NEXT: s_ashr_i32 s6, s1, 31 2783; GFX8-NEXT: s_add_i32 s1, s1, s6 2784; GFX8-NEXT: s_xor_b32 s7, s1, s6 2785; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s7 2786; GFX8-NEXT: s_sub_i32 s1, 0, s7 2787; GFX8-NEXT: s_sext_i32_i16 s0, s0 2788; GFX8-NEXT: s_ashr_i32 s8, s0, 31 2789; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 2790; GFX8-NEXT: s_add_i32 s0, s0, s8 2791; GFX8-NEXT: s_xor_b32 s9, s0, s8 2792; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2793; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 2794; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 2795; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2796; GFX8-NEXT: s_xor_b32 s4, s8, s6 2797; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 2798; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 2799; GFX8-NEXT: v_mul_hi_u32 v2, s9, v0 2800; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2801; GFX8-NEXT: v_mov_b32_e32 v0, s0 2802; GFX8-NEXT: v_mov_b32_e32 v1, s1 2803; GFX8-NEXT: v_mul_lo_u32 v3, v2, s7 2804; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 2805; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s9, v3 2806; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 2807; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2808; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 2809; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2810; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 2811; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 2812; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2813; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 2814; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2 2815; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2816; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s4, v2 2817; GFX8-NEXT: v_xor_b32_e32 v3, s8, v3 2818; GFX8-NEXT: flat_store_short v[0:1], v2 2819; GFX8-NEXT: v_mov_b32_e32 v0, s2 2820; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s8, v3 2821; GFX8-NEXT: v_mov_b32_e32 v1, s3 2822; GFX8-NEXT: flat_store_short v[0:1], v3 2823; GFX8-NEXT: s_endpgm 2824; 2825; GFX9-LABEL: sdiv_i16: 2826; GFX9: ; %bb.0: 2827; GFX9-NEXT: s_load_dword s0, s[4:5], 0x10 2828; GFX9-NEXT: v_mov_b32_e32 v2, 0 2829; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2830; GFX9-NEXT: s_bfe_i32 s1, s0, 0x100010 2831; GFX9-NEXT: s_ashr_i32 s6, s1, 31 2832; GFX9-NEXT: s_add_i32 s1, s1, s6 2833; GFX9-NEXT: s_xor_b32 s7, s1, s6 2834; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 2835; GFX9-NEXT: s_sub_i32 s1, 0, s7 2836; GFX9-NEXT: s_sext_i32_i16 s0, s0 2837; GFX9-NEXT: s_ashr_i32 s8, s0, 31 2838; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 2839; GFX9-NEXT: s_add_i32 s0, s0, s8 2840; GFX9-NEXT: s_xor_b32 s9, s0, s8 2841; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2842; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 2843; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 2844; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2845; GFX9-NEXT: s_xor_b32 s4, s8, s6 2846; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 2847; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 2848; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 2849; GFX9-NEXT: v_mul_lo_u32 v1, v0, s7 2850; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 2851; GFX9-NEXT: v_sub_u32_e32 v1, s9, v1 2852; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 2853; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2854; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 2855; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2856; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 2857; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 2858; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2859; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 2860; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 2861; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 2862; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 2863; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 2864; GFX9-NEXT: v_subrev_u32_e32 v1, s8, v1 2865; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2866; GFX9-NEXT: global_store_short v2, v0, s[0:1] 2867; GFX9-NEXT: global_store_short v2, v1, s[2:3] 2868; GFX9-NEXT: s_endpgm 2869; 2870; GFX10-LABEL: sdiv_i16: 2871; GFX10: ; %bb.0: 2872; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 2873; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2874; GFX10-NEXT: s_bfe_i32 s1, s0, 0x100010 2875; GFX10-NEXT: s_sext_i32_i16 s0, s0 2876; GFX10-NEXT: s_ashr_i32 s6, s1, 31 2877; GFX10-NEXT: s_ashr_i32 s8, s0, 31 2878; GFX10-NEXT: s_add_i32 s1, s1, s6 2879; GFX10-NEXT: s_add_i32 s0, s0, s8 2880; GFX10-NEXT: s_xor_b32 s7, s1, s6 2881; GFX10-NEXT: s_xor_b32 s0, s0, s8 2882; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s7 2883; GFX10-NEXT: s_sub_i32 s1, 0, s7 2884; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 2885; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2886; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 2887; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 2888; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 2889; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 2890; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 2891; GFX10-NEXT: v_mul_lo_u32 v1, v0, s7 2892; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 2893; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 2894; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2895; GFX10-NEXT: s_xor_b32 s4, s8, s6 2896; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 2897; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 2898; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2899; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2900; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 2901; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 2902; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 2903; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2904; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2905; GFX10-NEXT: v_mov_b32_e32 v2, 0 2906; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 2907; GFX10-NEXT: v_xor_b32_e32 v1, s8, v1 2908; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 2909; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s8, v1 2910; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2911; GFX10-NEXT: global_store_short v2, v0, s[0:1] 2912; GFX10-NEXT: global_store_short v2, v1, s[2:3] 2913; GFX10-NEXT: s_endpgm 2914 %div = sdiv i16 %x, %y 2915 store i16 %div, i16 addrspace(1)* %out0 2916 %rem = srem i16 %x, %y 2917 store i16 %rem, i16 addrspace(1)* %out1 2918 ret void 2919} 2920 2921define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %x, <2 x i16> %y) { 2922; GFX8-LABEL: sdivrem_v2i16: 2923; GFX8: ; %bb.0: 2924; GFX8-NEXT: s_load_dword s0, s[4:5], 0x14 2925; GFX8-NEXT: s_load_dword s8, s[4:5], 0x10 2926; GFX8-NEXT: s_mov_b32 s9, 0x100010 2927; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2928; GFX8-NEXT: s_sext_i32_i16 s1, s0 2929; GFX8-NEXT: s_ashr_i32 s2, s1, 31 2930; GFX8-NEXT: s_add_i32 s1, s1, s2 2931; GFX8-NEXT: s_xor_b32 s3, s1, s2 2932; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s3 2933; GFX8-NEXT: s_sub_i32 s6, 0, s3 2934; GFX8-NEXT: s_sext_i32_i16 s1, s8 2935; GFX8-NEXT: s_bfe_i32 s0, s0, s9 2936; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 2937; GFX8-NEXT: s_ashr_i32 s10, s1, 31 2938; GFX8-NEXT: s_ashr_i32 s11, s0, 31 2939; GFX8-NEXT: s_add_i32 s1, s1, s10 2940; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2941; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 2942; GFX8-NEXT: s_add_i32 s0, s0, s11 2943; GFX8-NEXT: s_xor_b32 s12, s0, s11 2944; GFX8-NEXT: s_xor_b32 s1, s1, s10 2945; GFX8-NEXT: v_mul_lo_u32 v1, s6, v0 2946; GFX8-NEXT: v_cvt_f32_u32_e32 v2, s12 2947; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 2948; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 2949; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 2950; GFX8-NEXT: v_mul_hi_u32 v0, s1, v0 2951; GFX8-NEXT: v_rcp_iflag_f32_e32 v1, v2 2952; GFX8-NEXT: v_mul_lo_u32 v2, v0, s3 2953; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 2954; GFX8-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 2955; GFX8-NEXT: v_cvt_u32_f32_e32 v1, v1 2956; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s1, v2 2957; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 2958; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2959; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s3, v2 2960; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2961; GFX8-NEXT: v_add_u32_e32 v3, vcc, 1, v0 2962; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v2 2963; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2964; GFX8-NEXT: v_subrev_u32_e64 v3, s[0:1], s3, v2 2965; GFX8-NEXT: s_sub_i32 s1, 0, s12 2966; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2967; GFX8-NEXT: v_mul_lo_u32 v3, s1, v1 2968; GFX8-NEXT: s_bfe_i32 s1, s8, s9 2969; GFX8-NEXT: s_xor_b32 s0, s10, s2 2970; GFX8-NEXT: s_ashr_i32 s2, s1, 31 2971; GFX8-NEXT: v_mul_hi_u32 v3, v1, v3 2972; GFX8-NEXT: s_add_i32 s1, s1, s2 2973; GFX8-NEXT: s_xor_b32 s1, s1, s2 2974; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 2975; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 2976; GFX8-NEXT: v_mul_hi_u32 v1, s1, v1 2977; GFX8-NEXT: v_xor_b32_e32 v2, s10, v2 2978; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s0, v0 2979; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s10, v2 2980; GFX8-NEXT: v_mul_lo_u32 v3, v1, s12 2981; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 2982; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s1, v3 2983; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s12, v3 2984; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2985; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s12, v3 2986; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2987; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v1 2988; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s12, v3 2989; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2990; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s12, v3 2991; GFX8-NEXT: s_xor_b32 s0, s2, s11 2992; GFX8-NEXT: v_xor_b32_e32 v1, s0, v1 2993; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2994; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, s0, v1 2995; GFX8-NEXT: s_mov_b32 s0, 0xffff 2996; GFX8-NEXT: v_xor_b32_e32 v3, s2, v3 2997; GFX8-NEXT: v_and_b32_e32 v1, s0, v1 2998; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2999; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s2, v3 3000; GFX8-NEXT: v_or_b32_sdwa v4, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3001; GFX8-NEXT: v_and_b32_e32 v0, s0, v3 3002; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3003; GFX8-NEXT: v_or_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3004; GFX8-NEXT: s_waitcnt lgkmcnt(0) 3005; GFX8-NEXT: v_mov_b32_e32 v0, s4 3006; GFX8-NEXT: v_mov_b32_e32 v1, s5 3007; GFX8-NEXT: flat_store_dword v[0:1], v4 3008; GFX8-NEXT: v_mov_b32_e32 v0, s6 3009; GFX8-NEXT: v_mov_b32_e32 v1, s7 3010; GFX8-NEXT: flat_store_dword v[0:1], v2 3011; GFX8-NEXT: s_endpgm 3012; 3013; GFX9-LABEL: sdivrem_v2i16: 3014; GFX9: ; %bb.0: 3015; GFX9-NEXT: s_load_dword s6, s[4:5], 0x14 3016; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3017; GFX9-NEXT: s_sext_i32_i16 s0, s6 3018; GFX9-NEXT: s_ashr_i32 s7, s0, 31 3019; GFX9-NEXT: s_add_i32 s0, s0, s7 3020; GFX9-NEXT: s_xor_b32 s8, s0, s7 3021; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s8 3022; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3023; GFX9-NEXT: s_load_dword s9, s[4:5], 0x10 3024; GFX9-NEXT: s_mov_b32 s4, 0x100010 3025; GFX9-NEXT: s_bfe_i32 s6, s6, s4 3026; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 3027; GFX9-NEXT: s_ashr_i32 s10, s6, 31 3028; GFX9-NEXT: s_add_i32 s6, s6, s10 3029; GFX9-NEXT: s_xor_b32 s6, s6, s10 3030; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3031; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 3032; GFX9-NEXT: s_sub_i32 s11, 0, s8 3033; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s6 3034; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3035; GFX9-NEXT: s_sext_i32_i16 s5, s9 3036; GFX9-NEXT: v_mul_lo_u32 v1, s11, v0 3037; GFX9-NEXT: s_ashr_i32 s11, s5, 31 3038; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 3039; GFX9-NEXT: s_add_i32 s5, s5, s11 3040; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 3041; GFX9-NEXT: s_xor_b32 s5, s5, s11 3042; GFX9-NEXT: s_bfe_i32 s4, s9, s4 3043; GFX9-NEXT: s_sub_i32 s9, 0, s6 3044; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 3045; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v2 3046; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 3047; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0 3048; GFX9-NEXT: s_xor_b32 s7, s11, s7 3049; GFX9-NEXT: v_mul_lo_u32 v3, s9, v1 3050; GFX9-NEXT: v_mul_lo_u32 v2, v0, s8 3051; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 3052; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 3053; GFX9-NEXT: v_sub_u32_e32 v2, s5, v2 3054; GFX9-NEXT: s_ashr_i32 s5, s4, 31 3055; GFX9-NEXT: s_add_i32 s4, s4, s5 3056; GFX9-NEXT: s_xor_b32 s4, s4, s5 3057; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 3058; GFX9-NEXT: v_mul_hi_u32 v1, s4, v1 3059; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 3060; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3061; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v2 3062; GFX9-NEXT: v_mul_lo_u32 v3, v1, s6 3063; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3064; GFX9-NEXT: v_add_u32_e32 v4, 1, v0 3065; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 3066; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 3067; GFX9-NEXT: v_subrev_u32_e32 v4, s8, v2 3068; GFX9-NEXT: v_sub_u32_e32 v3, s4, v3 3069; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3070; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 3071; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 3072; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 3073; GFX9-NEXT: v_subrev_u32_e32 v4, s6, v3 3074; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 3075; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 3076; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 3077; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 3078; GFX9-NEXT: v_subrev_u32_e32 v4, s6, v3 3079; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 3080; GFX9-NEXT: s_xor_b32 s4, s5, s10 3081; GFX9-NEXT: v_xor_b32_e32 v0, s7, v0 3082; GFX9-NEXT: v_xor_b32_e32 v2, s11, v2 3083; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1 3084; GFX9-NEXT: v_xor_b32_e32 v3, s5, v3 3085; GFX9-NEXT: v_subrev_u32_e32 v0, s7, v0 3086; GFX9-NEXT: v_subrev_u32_e32 v2, s11, v2 3087; GFX9-NEXT: v_sub_u32_sdwa v1, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3088; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 3089; GFX9-NEXT: v_sub_u32_sdwa v3, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3090; GFX9-NEXT: v_and_or_b32 v0, v0, v4, v1 3091; GFX9-NEXT: v_and_or_b32 v1, v2, v4, v3 3092; GFX9-NEXT: v_mov_b32_e32 v2, 0 3093; GFX9-NEXT: global_store_dword v2, v0, s[0:1] 3094; GFX9-NEXT: global_store_dword v2, v1, s[2:3] 3095; GFX9-NEXT: s_endpgm 3096; 3097; GFX10-LABEL: sdivrem_v2i16: 3098; GFX10: ; %bb.0: 3099; GFX10-NEXT: s_load_dword s0, s[4:5], 0x14 3100; GFX10-NEXT: s_mov_b32 s1, 0x100010 3101; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3102; GFX10-NEXT: s_sext_i32_i16 s2, s0 3103; GFX10-NEXT: s_bfe_i32 s0, s0, s1 3104; GFX10-NEXT: s_ashr_i32 s3, s2, 31 3105; GFX10-NEXT: s_ashr_i32 s8, s0, 31 3106; GFX10-NEXT: s_add_i32 s2, s2, s3 3107; GFX10-NEXT: s_add_i32 s0, s0, s8 3108; GFX10-NEXT: s_xor_b32 s2, s2, s3 3109; GFX10-NEXT: s_xor_b32 s9, s0, s8 3110; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s2 3111; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s9 3112; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 3113; GFX10-NEXT: s_sub_i32 s6, 0, s2 3114; GFX10-NEXT: s_sub_i32 s7, 0, s9 3115; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 3116; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 3117; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3118; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 3119; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 3120; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 3121; GFX10-NEXT: v_mul_lo_u32 v2, s6, v0 3122; GFX10-NEXT: v_mul_lo_u32 v3, s7, v1 3123; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3124; GFX10-NEXT: s_sext_i32_i16 s6, s0 3125; GFX10-NEXT: s_bfe_i32 s0, s0, s1 3126; GFX10-NEXT: s_ashr_i32 s1, s6, 31 3127; GFX10-NEXT: s_ashr_i32 s10, s0, 31 3128; GFX10-NEXT: s_add_i32 s6, s6, s1 3129; GFX10-NEXT: s_add_i32 s0, s0, s10 3130; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 3131; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 3132; GFX10-NEXT: s_xor_b32 s6, s6, s1 3133; GFX10-NEXT: s_xor_b32 s0, s0, s10 3134; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 3135; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 3136; GFX10-NEXT: v_mul_hi_u32 v0, s6, v0 3137; GFX10-NEXT: v_mul_hi_u32 v1, s0, v1 3138; GFX10-NEXT: v_mul_lo_u32 v2, v0, s2 3139; GFX10-NEXT: v_mul_lo_u32 v3, v1, s9 3140; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 3141; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v1 3142; GFX10-NEXT: v_sub_nc_u32_e32 v2, s6, v2 3143; GFX10-NEXT: v_sub_nc_u32_e32 v3, s0, v3 3144; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 3145; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v2 3146; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s2, v2 3147; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s9, v3 3148; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s9, v3 3149; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 3150; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0 3151; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 3152; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo 3153; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 3154; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v2 3155; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v1 3156; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s9, v3 3157; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s2, v2 3158; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s9, v3 3159; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 3160; GFX10-NEXT: s_xor_b32 s2, s1, s3 3161; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 3162; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0 3163; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo 3164; GFX10-NEXT: s_xor_b32 s0, s10, s8 3165; GFX10-NEXT: v_xor_b32_e32 v0, s2, v0 3166; GFX10-NEXT: v_xor_b32_e32 v1, s0, v1 3167; GFX10-NEXT: v_xor_b32_e32 v2, s1, v2 3168; GFX10-NEXT: v_xor_b32_e32 v3, s10, v3 3169; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 3170; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s2, v0 3171; GFX10-NEXT: v_sub_nc_u32_sdwa v1, v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3172; GFX10-NEXT: v_subrev_nc_u32_e32 v2, s1, v2 3173; GFX10-NEXT: v_sub_nc_u32_sdwa v3, v3, s10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3174; GFX10-NEXT: v_and_or_b32 v0, v0, v4, v1 3175; GFX10-NEXT: v_mov_b32_e32 v1, 0 3176; GFX10-NEXT: v_and_or_b32 v2, v2, v4, v3 3177; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3178; GFX10-NEXT: global_store_dword v1, v0, s[4:5] 3179; GFX10-NEXT: global_store_dword v1, v2, s[6:7] 3180; GFX10-NEXT: s_endpgm 3181 %div = sdiv <2 x i16> %x, %y 3182 store <2 x i16> %div, <2 x i16> addrspace(1)* %out0 3183 %rem = srem <2 x i16> %x, %y 3184 store <2 x i16> %rem, <2 x i16> addrspace(1)* %out1 3185 ret void 3186} 3187 3188define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* %out1, i3 %x, i3 %y) { 3189; GFX8-LABEL: sdivrem_i3: 3190; GFX8: ; %bb.0: 3191; GFX8-NEXT: s_load_dword s0, s[4:5], 0x10 3192; GFX8-NEXT: s_waitcnt lgkmcnt(0) 3193; GFX8-NEXT: s_bfe_i32 s1, s0, 0x30008 3194; GFX8-NEXT: s_ashr_i32 s6, s1, 31 3195; GFX8-NEXT: s_add_i32 s1, s1, s6 3196; GFX8-NEXT: s_xor_b32 s7, s1, s6 3197; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s7 3198; GFX8-NEXT: s_sub_i32 s1, 0, s7 3199; GFX8-NEXT: s_bfe_i32 s0, s0, 0x30000 3200; GFX8-NEXT: s_ashr_i32 s8, s0, 31 3201; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 3202; GFX8-NEXT: s_add_i32 s0, s0, s8 3203; GFX8-NEXT: s_xor_b32 s9, s0, s8 3204; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3205; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 3206; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 3207; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3208; GFX8-NEXT: s_xor_b32 s4, s8, s6 3209; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 3210; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 3211; GFX8-NEXT: v_mul_hi_u32 v2, s9, v0 3212; GFX8-NEXT: s_waitcnt lgkmcnt(0) 3213; GFX8-NEXT: v_mov_b32_e32 v0, s0 3214; GFX8-NEXT: v_mov_b32_e32 v1, s1 3215; GFX8-NEXT: v_mul_lo_u32 v3, v2, s7 3216; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 3217; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s9, v3 3218; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 3219; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3220; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 3221; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 3222; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 3223; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 3224; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3225; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 3226; GFX8-NEXT: v_xor_b32_e32 v2, s4, v2 3227; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 3228; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s4, v2 3229; GFX8-NEXT: v_xor_b32_e32 v3, s8, v3 3230; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 3231; GFX8-NEXT: flat_store_byte v[0:1], v2 3232; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s8, v3 3233; GFX8-NEXT: v_mov_b32_e32 v0, s2 3234; GFX8-NEXT: v_and_b32_e32 v2, 7, v3 3235; GFX8-NEXT: v_mov_b32_e32 v1, s3 3236; GFX8-NEXT: flat_store_byte v[0:1], v2 3237; GFX8-NEXT: s_endpgm 3238; 3239; GFX9-LABEL: sdivrem_i3: 3240; GFX9: ; %bb.0: 3241; GFX9-NEXT: s_load_dword s0, s[4:5], 0x10 3242; GFX9-NEXT: v_mov_b32_e32 v2, 0 3243; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3244; GFX9-NEXT: s_bfe_i32 s1, s0, 0x30008 3245; GFX9-NEXT: s_ashr_i32 s6, s1, 31 3246; GFX9-NEXT: s_add_i32 s1, s1, s6 3247; GFX9-NEXT: s_xor_b32 s7, s1, s6 3248; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 3249; GFX9-NEXT: s_sub_i32 s1, 0, s7 3250; GFX9-NEXT: s_bfe_i32 s0, s0, 0x30000 3251; GFX9-NEXT: s_ashr_i32 s8, s0, 31 3252; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 3253; GFX9-NEXT: s_add_i32 s0, s0, s8 3254; GFX9-NEXT: s_xor_b32 s9, s0, s8 3255; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3256; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 3257; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 3258; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3259; GFX9-NEXT: s_xor_b32 s4, s8, s6 3260; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 3261; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 3262; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 3263; GFX9-NEXT: v_mul_lo_u32 v1, v0, s7 3264; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 3265; GFX9-NEXT: v_sub_u32_e32 v1, s9, v1 3266; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 3267; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3268; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 3269; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3270; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 3271; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 3272; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3273; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 3274; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3275; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 3276; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 3277; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 3278; GFX9-NEXT: v_and_b32_e32 v0, 7, v0 3279; GFX9-NEXT: v_subrev_u32_e32 v1, s8, v1 3280; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3281; GFX9-NEXT: global_store_byte v2, v0, s[0:1] 3282; GFX9-NEXT: v_and_b32_e32 v0, 7, v1 3283; GFX9-NEXT: global_store_byte v2, v0, s[2:3] 3284; GFX9-NEXT: s_endpgm 3285; 3286; GFX10-LABEL: sdivrem_i3: 3287; GFX10: ; %bb.0: 3288; GFX10-NEXT: s_load_dword s0, s[4:5], 0x10 3289; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3290; GFX10-NEXT: s_bfe_i32 s1, s0, 0x30008 3291; GFX10-NEXT: s_bfe_i32 s0, s0, 0x30000 3292; GFX10-NEXT: s_ashr_i32 s6, s1, 31 3293; GFX10-NEXT: s_ashr_i32 s7, s0, 31 3294; GFX10-NEXT: s_add_i32 s1, s1, s6 3295; GFX10-NEXT: s_add_i32 s0, s0, s7 3296; GFX10-NEXT: s_xor_b32 s1, s1, s6 3297; GFX10-NEXT: s_xor_b32 s0, s0, s7 3298; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s1 3299; GFX10-NEXT: s_sub_i32 s2, 0, s1 3300; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 3301; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3302; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 3303; GFX10-NEXT: v_mul_lo_u32 v1, s2, v0 3304; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 3305; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 3306; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 3307; GFX10-NEXT: v_mul_lo_u32 v1, v0, s1 3308; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 3309; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 3310; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 3311; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 3312; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3313; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 3314; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 3315; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 3316; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 3317; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3318; GFX10-NEXT: s_xor_b32 s4, s7, s6 3319; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3320; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 3321; GFX10-NEXT: v_mov_b32_e32 v2, 0 3322; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 3323; GFX10-NEXT: v_xor_b32_e32 v1, s7, v1 3324; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 3325; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s7, v1 3326; GFX10-NEXT: v_and_b32_e32 v0, 7, v0 3327; GFX10-NEXT: v_and_b32_e32 v1, 7, v1 3328; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3329; GFX10-NEXT: global_store_byte v2, v0, s[0:1] 3330; GFX10-NEXT: global_store_byte v2, v1, s[2:3] 3331; GFX10-NEXT: s_endpgm 3332 %div = sdiv i3 %x, %y 3333 store i3 %div, i3 addrspace(1)* %out0 3334 %rem = srem i3 %x, %y 3335 store i3 %rem, i3 addrspace(1)* %out1 3336 ret void 3337} 3338 3339define amdgpu_kernel void @sdivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { 3340; GFX8-LABEL: sdivrem_i27: 3341; GFX8: ; %bb.0: 3342; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 3343; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 3344; GFX8-NEXT: s_mov_b32 s9, 0x7ffffff 3345; GFX8-NEXT: s_waitcnt lgkmcnt(0) 3346; GFX8-NEXT: s_bfe_i32 s1, s1, 0x1b0000 3347; GFX8-NEXT: s_ashr_i32 s2, s1, 31 3348; GFX8-NEXT: s_add_i32 s1, s1, s2 3349; GFX8-NEXT: s_xor_b32 s3, s1, s2 3350; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s3 3351; GFX8-NEXT: s_sub_i32 s1, 0, s3 3352; GFX8-NEXT: s_bfe_i32 s0, s0, 0x1b0000 3353; GFX8-NEXT: s_ashr_i32 s8, s0, 31 3354; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 3355; GFX8-NEXT: s_add_i32 s0, s0, s8 3356; GFX8-NEXT: s_xor_b32 s0, s0, s8 3357; GFX8-NEXT: s_xor_b32 s2, s8, s2 3358; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3359; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 3360; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 3361; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 3362; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 3363; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 3364; GFX8-NEXT: v_mul_lo_u32 v1, v0, s3 3365; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0 3366; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s0, v1 3367; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 3368; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3369; GFX8-NEXT: v_subrev_u32_e64 v2, s[0:1], s3, v1 3370; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3371; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0 3372; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 3373; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3374; GFX8-NEXT: v_subrev_u32_e64 v2, s[0:1], s3, v1 3375; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3376; GFX8-NEXT: v_xor_b32_e32 v0, s2, v0 3377; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s2, v0 3378; GFX8-NEXT: v_xor_b32_e32 v1, s8, v1 3379; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s8, v1 3380; GFX8-NEXT: v_and_b32_e32 v3, s9, v0 3381; GFX8-NEXT: v_mov_b32_e32 v0, s4 3382; GFX8-NEXT: v_mov_b32_e32 v1, s5 3383; GFX8-NEXT: flat_store_dword v[0:1], v3 3384; GFX8-NEXT: v_mov_b32_e32 v0, s6 3385; GFX8-NEXT: v_and_b32_e32 v2, s9, v2 3386; GFX8-NEXT: v_mov_b32_e32 v1, s7 3387; GFX8-NEXT: flat_store_dword v[0:1], v2 3388; GFX8-NEXT: s_endpgm 3389; 3390; GFX9-LABEL: sdivrem_i27: 3391; GFX9: ; %bb.0: 3392; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 3393; GFX9-NEXT: v_mov_b32_e32 v2, 0 3394; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3395; GFX9-NEXT: s_bfe_i32 s1, s1, 0x1b0000 3396; GFX9-NEXT: s_ashr_i32 s6, s1, 31 3397; GFX9-NEXT: s_add_i32 s1, s1, s6 3398; GFX9-NEXT: s_xor_b32 s7, s1, s6 3399; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 3400; GFX9-NEXT: s_sub_i32 s1, 0, s7 3401; GFX9-NEXT: s_bfe_i32 s0, s0, 0x1b0000 3402; GFX9-NEXT: s_ashr_i32 s8, s0, 31 3403; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 3404; GFX9-NEXT: s_add_i32 s0, s0, s8 3405; GFX9-NEXT: s_xor_b32 s9, s0, s8 3406; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3407; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 3408; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 3409; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3410; GFX9-NEXT: s_xor_b32 s5, s8, s6 3411; GFX9-NEXT: s_mov_b32 s4, 0x7ffffff 3412; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 3413; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 3414; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 3415; GFX9-NEXT: v_mul_lo_u32 v1, v0, s7 3416; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 3417; GFX9-NEXT: v_sub_u32_e32 v1, s9, v1 3418; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 3419; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3420; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 3421; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3422; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 3423; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 3424; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 3425; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 3426; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 3427; GFX9-NEXT: v_xor_b32_e32 v0, s5, v0 3428; GFX9-NEXT: v_subrev_u32_e32 v0, s5, v0 3429; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 3430; GFX9-NEXT: v_and_b32_e32 v0, s4, v0 3431; GFX9-NEXT: v_subrev_u32_e32 v1, s8, v1 3432; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3433; GFX9-NEXT: global_store_dword v2, v0, s[0:1] 3434; GFX9-NEXT: v_and_b32_e32 v0, s4, v1 3435; GFX9-NEXT: global_store_dword v2, v0, s[2:3] 3436; GFX9-NEXT: s_endpgm 3437; 3438; GFX10-LABEL: sdivrem_i27: 3439; GFX10: ; %bb.0: 3440; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 3441; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3442; GFX10-NEXT: s_bfe_i32 s1, s1, 0x1b0000 3443; GFX10-NEXT: s_bfe_i32 s0, s0, 0x1b0000 3444; GFX10-NEXT: s_ashr_i32 s6, s1, 31 3445; GFX10-NEXT: s_ashr_i32 s7, s0, 31 3446; GFX10-NEXT: s_add_i32 s1, s1, s6 3447; GFX10-NEXT: s_add_i32 s0, s0, s7 3448; GFX10-NEXT: s_xor_b32 s1, s1, s6 3449; GFX10-NEXT: s_xor_b32 s0, s0, s7 3450; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s1 3451; GFX10-NEXT: s_sub_i32 s2, 0, s1 3452; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 3453; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 3454; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 3455; GFX10-NEXT: v_mul_lo_u32 v1, s2, v0 3456; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 3457; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 3458; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 3459; GFX10-NEXT: v_mul_lo_u32 v1, v0, s1 3460; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 3461; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 3462; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 3463; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 3464; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3465; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 3466; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 3467; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 3468; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 3469; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3470; GFX10-NEXT: s_xor_b32 s4, s7, s6 3471; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 3472; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 3473; GFX10-NEXT: v_mov_b32_e32 v2, 0 3474; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 3475; GFX10-NEXT: v_xor_b32_e32 v1, s7, v1 3476; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 3477; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s7, v1 3478; GFX10-NEXT: s_mov_b32 s4, 0x7ffffff 3479; GFX10-NEXT: v_and_b32_e32 v0, s4, v0 3480; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 3481; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3482; GFX10-NEXT: global_store_dword v2, v0, s[0:1] 3483; GFX10-NEXT: global_store_dword v2, v1, s[2:3] 3484; GFX10-NEXT: s_endpgm 3485 %div = sdiv i27 %x, %y 3486 store i27 %div, i27 addrspace(1)* %out0 3487 %rem = srem i27 %x, %y 3488 store i27 %rem, i27 addrspace(1)* %out1 3489 ret void 3490} 3491