1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4define i128 @v_shl_i128_vv(i128 %lhs, i128 %rhs) { 5; GCN-LABEL: v_shl_i128_vv: 6; GCN: ; %bb.0: 7; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; GCN-NEXT: v_lshl_b64 v[5:6], v[2:3], v4 9; GCN-NEXT: v_sub_i32_e32 v9, vcc, 64, v4 10; GCN-NEXT: v_subrev_i32_e32 v11, vcc, 64, v4 11; GCN-NEXT: v_lshl_b64 v[7:8], v[0:1], v4 12; GCN-NEXT: v_lshr_b64 v[9:10], v[0:1], v9 13; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], v11 14; GCN-NEXT: v_or_b32_e32 v6, v6, v10 15; GCN-NEXT: v_or_b32_e32 v5, v5, v9 16; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4 17; GCN-NEXT: v_cndmask_b32_e32 v6, v1, v6, vcc 18; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 19; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v8, vcc 20; GCN-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v4 21; GCN-NEXT: v_cndmask_b32_e64 v3, v6, v3, s[6:7] 22; GCN-NEXT: v_cndmask_b32_e64 v2, v0, v2, s[6:7] 23; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v7, vcc 24; GCN-NEXT: s_setpc_b64 s[30:31] 25 %shl = shl i128 %lhs, %rhs 26 ret i128 %shl 27} 28 29define i128 @v_lshr_i128_vv(i128 %lhs, i128 %rhs) { 30; GCN-LABEL: v_lshr_i128_vv: 31; GCN: ; %bb.0: 32; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GCN-NEXT: v_lshr_b64 v[5:6], v[0:1], v4 34; GCN-NEXT: v_sub_i32_e32 v9, vcc, 64, v4 35; GCN-NEXT: v_subrev_i32_e32 v11, vcc, 64, v4 36; GCN-NEXT: v_lshr_b64 v[7:8], v[2:3], v4 37; GCN-NEXT: v_lshl_b64 v[9:10], v[2:3], v9 38; GCN-NEXT: v_lshr_b64 v[2:3], v[2:3], v11 39; GCN-NEXT: v_or_b32_e32 v6, v6, v10 40; GCN-NEXT: v_or_b32_e32 v5, v5, v9 41; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4 42; GCN-NEXT: v_cndmask_b32_e32 v6, v3, v6, vcc 43; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 44; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v8, vcc 45; GCN-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v4 46; GCN-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[6:7] 47; GCN-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[6:7] 48; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v7, vcc 49; GCN-NEXT: s_setpc_b64 s[30:31] 50 51 %shl = lshr i128 %lhs, %rhs 52 ret i128 %shl 53} 54 55define i128 @v_ashr_i128_vv(i128 %lhs, i128 %rhs) { 56; GCN-LABEL: v_ashr_i128_vv: 57; GCN: ; %bb.0: 58; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 59; GCN-NEXT: v_ashrrev_i32_e32 v9, 31, v3 60; GCN-NEXT: v_ashr_i64 v[5:6], v[2:3], v4 61; GCN-NEXT: v_lshr_b64 v[7:8], v[0:1], v4 62; GCN-NEXT: v_sub_i32_e32 v10, vcc, 64, v4 63; GCN-NEXT: v_subrev_i32_e32 v11, vcc, 64, v4 64; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4 65; GCN-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc 66; GCN-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc 67; GCN-NEXT: v_lshl_b64 v[9:10], v[2:3], v10 68; GCN-NEXT: v_ashr_i64 v[2:3], v[2:3], v11 69; GCN-NEXT: v_or_b32_e32 v8, v8, v10 70; GCN-NEXT: v_or_b32_e32 v7, v7, v9 71; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 72; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc 73; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 74; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 75; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 76; GCN-NEXT: v_mov_b32_e32 v2, v5 77; GCN-NEXT: v_mov_b32_e32 v3, v6 78; GCN-NEXT: s_setpc_b64 s[30:31] 79 %shl = ashr i128 %lhs, %rhs 80 ret i128 %shl 81} 82 83 84define i128 @v_shl_i128_vk(i128 %lhs) { 85; GCN-LABEL: v_shl_i128_vk: 86; GCN: ; %bb.0: 87; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GCN-NEXT: v_lshl_b64 v[2:3], v[2:3], 17 89; GCN-NEXT: v_lshrrev_b32_e32 v4, 15, v1 90; GCN-NEXT: v_or_b32_e32 v2, v2, v4 91; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], 17 92; GCN-NEXT: s_setpc_b64 s[30:31] 93 %shl = shl i128 %lhs, 17 94 ret i128 %shl 95} 96 97define i128 @v_lshr_i128_vk(i128 %lhs) { 98; GCN-LABEL: v_lshr_i128_vk: 99; GCN: ; %bb.0: 100; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 101; GCN-NEXT: v_lshr_b64 v[0:1], v[2:3], 1 102; GCN-NEXT: v_mov_b32_e32 v2, 0 103; GCN-NEXT: v_mov_b32_e32 v3, 0 104; GCN-NEXT: s_setpc_b64 s[30:31] 105 %shl = lshr i128 %lhs, 65 106 ret i128 %shl 107} 108 109define i128 @v_ashr_i128_vk(i128 %lhs) { 110; GCN-LABEL: v_ashr_i128_vk: 111; GCN: ; %bb.0: 112; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GCN-NEXT: v_lshl_b64 v[4:5], v[2:3], 31 114; GCN-NEXT: v_lshrrev_b32_e32 v0, 1, v1 115; GCN-NEXT: v_or_b32_e32 v4, v0, v4 116; GCN-NEXT: v_ashr_i64 v[2:3], v[2:3], 33 117; GCN-NEXT: v_mov_b32_e32 v0, v4 118; GCN-NEXT: v_mov_b32_e32 v1, v5 119; GCN-NEXT: s_setpc_b64 s[30:31] 120 %shl = ashr i128 %lhs, 33 121 ret i128 %shl 122} 123 124define i128 @v_shl_i128_kv(i128 %rhs) { 125; GCN-LABEL: v_shl_i128_kv: 126; GCN: ; %bb.0: 127; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; GCN-NEXT: v_sub_i32_e32 v1, vcc, 64, v0 129; GCN-NEXT: v_subrev_i32_e32 v3, vcc, 64, v0 130; GCN-NEXT: v_lshl_b64 v[4:5], 17, v0 131; GCN-NEXT: v_lshr_b64 v[1:2], 17, v1 132; GCN-NEXT: v_lshl_b64 v[6:7], 17, v3 133; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v0 134; GCN-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc 135; GCN-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc 136; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc 137; GCN-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v0 138; GCN-NEXT: v_cndmask_b32_e64 v3, 0, v2, s[6:7] 139; GCN-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[6:7] 140; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc 141; GCN-NEXT: s_setpc_b64 s[30:31] 142 %shl = shl i128 17, %rhs 143 ret i128 %shl 144} 145 146define i128 @v_lshr_i128_kv(i128 %rhs) { 147; GCN-LABEL: v_lshr_i128_kv: 148; GCN: ; %bb.0: 149; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 150; GCN-NEXT: s_mov_b32 s7, 0 151; GCN-NEXT: s_movk_i32 s6, 0x41 152; GCN-NEXT: v_mov_b32_e32 v3, 0x41 153; GCN-NEXT: v_lshr_b64 v[1:2], s[6:7], v0 154; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v0 155; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 156; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc 157; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 158; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc 159; GCN-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc 160; GCN-NEXT: v_mov_b32_e32 v2, 0 161; GCN-NEXT: v_mov_b32_e32 v3, 0 162; GCN-NEXT: s_setpc_b64 s[30:31] 163 %shl = lshr i128 65, %rhs 164 ret i128 %shl 165} 166 167define i128 @v_ashr_i128_kv(i128 %rhs) { 168; GCN-LABEL: v_ashr_i128_kv: 169; GCN: ; %bb.0: 170; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 171; GCN-NEXT: v_lshr_b64 v[1:2], 33, v0 172; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v0 173; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 174; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc 175; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 176; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc 177; GCN-NEXT: v_cndmask_b32_e32 v0, 33, v3, vcc 178; GCN-NEXT: v_mov_b32_e32 v2, 0 179; GCN-NEXT: v_mov_b32_e32 v3, 0 180; GCN-NEXT: s_setpc_b64 s[30:31] 181 %shl = ashr i128 33, %rhs 182 ret i128 %shl 183} 184 185define amdgpu_kernel void @s_shl_i128_ss(i128 %lhs, i128 %rhs) { 186; GCN-LABEL: s_shl_i128_ss: 187; GCN: .amd_kernel_code_t 188; GCN-NEXT: amd_code_version_major = 1 189; GCN-NEXT: amd_code_version_minor = 2 190; GCN-NEXT: amd_machine_kind = 1 191; GCN-NEXT: amd_machine_version_major = 7 192; GCN-NEXT: amd_machine_version_minor = 0 193; GCN-NEXT: amd_machine_version_stepping = 0 194; GCN-NEXT: kernel_code_entry_byte_offset = 256 195; GCN-NEXT: kernel_code_prefetch_byte_size = 0 196; GCN-NEXT: granulated_workitem_vgpr_count = 1 197; GCN-NEXT: granulated_wavefront_sgpr_count = 1 198; GCN-NEXT: priority = 0 199; GCN-NEXT: float_mode = 192 200; GCN-NEXT: priv = 0 201; GCN-NEXT: enable_dx10_clamp = 1 202; GCN-NEXT: debug_mode = 0 203; GCN-NEXT: enable_ieee_mode = 1 204; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 205; GCN-NEXT: user_sgpr_count = 6 206; GCN-NEXT: enable_trap_handler = 0 207; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 208; GCN-NEXT: enable_sgpr_workgroup_id_y = 0 209; GCN-NEXT: enable_sgpr_workgroup_id_z = 0 210; GCN-NEXT: enable_sgpr_workgroup_info = 0 211; GCN-NEXT: enable_vgpr_workitem_id = 0 212; GCN-NEXT: enable_exception_msb = 0 213; GCN-NEXT: granulated_lds_size = 0 214; GCN-NEXT: enable_exception = 0 215; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 216; GCN-NEXT: enable_sgpr_dispatch_ptr = 0 217; GCN-NEXT: enable_sgpr_queue_ptr = 0 218; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 219; GCN-NEXT: enable_sgpr_dispatch_id = 0 220; GCN-NEXT: enable_sgpr_flat_scratch_init = 0 221; GCN-NEXT: enable_sgpr_private_segment_size = 0 222; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 223; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 224; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 225; GCN-NEXT: enable_ordered_append_gds = 0 226; GCN-NEXT: private_element_size = 1 227; GCN-NEXT: is_ptr64 = 1 228; GCN-NEXT: is_dynamic_callstack = 0 229; GCN-NEXT: is_debug_enabled = 0 230; GCN-NEXT: is_xnack_enabled = 0 231; GCN-NEXT: workitem_private_segment_byte_size = 0 232; GCN-NEXT: workgroup_group_segment_byte_size = 0 233; GCN-NEXT: gds_segment_byte_size = 0 234; GCN-NEXT: kernarg_segment_byte_size = 32 235; GCN-NEXT: workgroup_fbarrier_count = 0 236; GCN-NEXT: wavefront_sgpr_count = 15 237; GCN-NEXT: workitem_vgpr_count = 8 238; GCN-NEXT: reserved_vgpr_first = 0 239; GCN-NEXT: reserved_vgpr_count = 0 240; GCN-NEXT: reserved_sgpr_first = 0 241; GCN-NEXT: reserved_sgpr_count = 0 242; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 243; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 244; GCN-NEXT: kernarg_segment_alignment = 4 245; GCN-NEXT: group_segment_alignment = 4 246; GCN-NEXT: private_segment_alignment = 4 247; GCN-NEXT: wavefront_size = 6 248; GCN-NEXT: call_convention = -1 249; GCN-NEXT: runtime_loader_kernel_symbol = 0 250; GCN-NEXT: .end_amd_kernel_code_t 251; GCN-NEXT: ; %bb.0: 252; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x0 253; GCN-NEXT: s_waitcnt lgkmcnt(0) 254; GCN-NEXT: s_lshl_b64 s[6:7], s[2:3], s4 255; GCN-NEXT: s_sub_i32 s5, 64, s4 256; GCN-NEXT: s_sub_i32 s12, s4, 64 257; GCN-NEXT: s_lshl_b64 s[8:9], s[0:1], s4 258; GCN-NEXT: s_lshr_b64 s[10:11], s[0:1], s5 259; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 260; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] 261; GCN-NEXT: v_mov_b32_e32 v0, s3 262; GCN-NEXT: v_mov_b32_e32 v2, s2 263; GCN-NEXT: v_mov_b32_e32 v1, s9 264; GCN-NEXT: v_mov_b32_e32 v4, s8 265; GCN-NEXT: v_mov_b32_e32 v3, s1 266; GCN-NEXT: v_mov_b32_e32 v5, s0 267; GCN-NEXT: v_mov_b32_e32 v6, s7 268; GCN-NEXT: v_cmp_lt_u32_e64 vcc, s4, 64 269; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc 270; GCN-NEXT: v_mov_b32_e32 v6, s6 271; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 0 272; GCN-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[0:1] 273; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v6, vcc 274; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 275; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc 276; GCN-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[0:1] 277; GCN-NEXT: v_mov_b32_e32 v4, 0 278; GCN-NEXT: v_mov_b32_e32 v5, 0 279; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 280; GCN-NEXT: s_endpgm 281 %shift = shl i128 %lhs, %rhs 282 store i128 %shift, i128 addrspace(1)* null 283 ret void 284} 285 286define amdgpu_kernel void @s_lshr_i128_ss(i128 %lhs, i128 %rhs) { 287; GCN-LABEL: s_lshr_i128_ss: 288; GCN: .amd_kernel_code_t 289; GCN-NEXT: amd_code_version_major = 1 290; GCN-NEXT: amd_code_version_minor = 2 291; GCN-NEXT: amd_machine_kind = 1 292; GCN-NEXT: amd_machine_version_major = 7 293; GCN-NEXT: amd_machine_version_minor = 0 294; GCN-NEXT: amd_machine_version_stepping = 0 295; GCN-NEXT: kernel_code_entry_byte_offset = 256 296; GCN-NEXT: kernel_code_prefetch_byte_size = 0 297; GCN-NEXT: granulated_workitem_vgpr_count = 1 298; GCN-NEXT: granulated_wavefront_sgpr_count = 1 299; GCN-NEXT: priority = 0 300; GCN-NEXT: float_mode = 192 301; GCN-NEXT: priv = 0 302; GCN-NEXT: enable_dx10_clamp = 1 303; GCN-NEXT: debug_mode = 0 304; GCN-NEXT: enable_ieee_mode = 1 305; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 306; GCN-NEXT: user_sgpr_count = 6 307; GCN-NEXT: enable_trap_handler = 0 308; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 309; GCN-NEXT: enable_sgpr_workgroup_id_y = 0 310; GCN-NEXT: enable_sgpr_workgroup_id_z = 0 311; GCN-NEXT: enable_sgpr_workgroup_info = 0 312; GCN-NEXT: enable_vgpr_workitem_id = 0 313; GCN-NEXT: enable_exception_msb = 0 314; GCN-NEXT: granulated_lds_size = 0 315; GCN-NEXT: enable_exception = 0 316; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 317; GCN-NEXT: enable_sgpr_dispatch_ptr = 0 318; GCN-NEXT: enable_sgpr_queue_ptr = 0 319; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 320; GCN-NEXT: enable_sgpr_dispatch_id = 0 321; GCN-NEXT: enable_sgpr_flat_scratch_init = 0 322; GCN-NEXT: enable_sgpr_private_segment_size = 0 323; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 324; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 325; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 326; GCN-NEXT: enable_ordered_append_gds = 0 327; GCN-NEXT: private_element_size = 1 328; GCN-NEXT: is_ptr64 = 1 329; GCN-NEXT: is_dynamic_callstack = 0 330; GCN-NEXT: is_debug_enabled = 0 331; GCN-NEXT: is_xnack_enabled = 0 332; GCN-NEXT: workitem_private_segment_byte_size = 0 333; GCN-NEXT: workgroup_group_segment_byte_size = 0 334; GCN-NEXT: gds_segment_byte_size = 0 335; GCN-NEXT: kernarg_segment_byte_size = 32 336; GCN-NEXT: workgroup_fbarrier_count = 0 337; GCN-NEXT: wavefront_sgpr_count = 15 338; GCN-NEXT: workitem_vgpr_count = 8 339; GCN-NEXT: reserved_vgpr_first = 0 340; GCN-NEXT: reserved_vgpr_count = 0 341; GCN-NEXT: reserved_sgpr_first = 0 342; GCN-NEXT: reserved_sgpr_count = 0 343; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 344; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 345; GCN-NEXT: kernarg_segment_alignment = 4 346; GCN-NEXT: group_segment_alignment = 4 347; GCN-NEXT: private_segment_alignment = 4 348; GCN-NEXT: wavefront_size = 6 349; GCN-NEXT: call_convention = -1 350; GCN-NEXT: runtime_loader_kernel_symbol = 0 351; GCN-NEXT: .end_amd_kernel_code_t 352; GCN-NEXT: ; %bb.0: 353; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x0 354; GCN-NEXT: s_waitcnt lgkmcnt(0) 355; GCN-NEXT: s_lshr_b64 s[6:7], s[0:1], s4 356; GCN-NEXT: s_sub_i32 s5, 64, s4 357; GCN-NEXT: s_sub_i32 s12, s4, 64 358; GCN-NEXT: s_lshr_b64 s[8:9], s[2:3], s4 359; GCN-NEXT: s_lshl_b64 s[10:11], s[2:3], s5 360; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], s12 361; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] 362; GCN-NEXT: v_mov_b32_e32 v0, s1 363; GCN-NEXT: v_mov_b32_e32 v4, s0 364; GCN-NEXT: v_mov_b32_e32 v2, s9 365; GCN-NEXT: v_mov_b32_e32 v5, s8 366; GCN-NEXT: v_mov_b32_e32 v1, s3 367; GCN-NEXT: v_mov_b32_e32 v3, s2 368; GCN-NEXT: v_mov_b32_e32 v6, s7 369; GCN-NEXT: v_cmp_lt_u32_e64 vcc, s4, 64 370; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 371; GCN-NEXT: v_mov_b32_e32 v6, s6 372; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 0 373; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v0, s[0:1] 374; GCN-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc 375; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc 376; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc 377; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] 378; GCN-NEXT: v_mov_b32_e32 v4, 0 379; GCN-NEXT: v_mov_b32_e32 v5, 0 380; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 381; GCN-NEXT: s_endpgm 382 %shift = lshr i128 %lhs, %rhs 383 store i128 %shift, i128 addrspace(1)* null 384 ret void 385} 386 387define amdgpu_kernel void @s_ashr_i128_ss(i128 %lhs, i128 %rhs) { 388; GCN-LABEL: s_ashr_i128_ss: 389; GCN: .amd_kernel_code_t 390; GCN-NEXT: amd_code_version_major = 1 391; GCN-NEXT: amd_code_version_minor = 2 392; GCN-NEXT: amd_machine_kind = 1 393; GCN-NEXT: amd_machine_version_major = 7 394; GCN-NEXT: amd_machine_version_minor = 0 395; GCN-NEXT: amd_machine_version_stepping = 0 396; GCN-NEXT: kernel_code_entry_byte_offset = 256 397; GCN-NEXT: kernel_code_prefetch_byte_size = 0 398; GCN-NEXT: granulated_workitem_vgpr_count = 1 399; GCN-NEXT: granulated_wavefront_sgpr_count = 1 400; GCN-NEXT: priority = 0 401; GCN-NEXT: float_mode = 192 402; GCN-NEXT: priv = 0 403; GCN-NEXT: enable_dx10_clamp = 1 404; GCN-NEXT: debug_mode = 0 405; GCN-NEXT: enable_ieee_mode = 1 406; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 407; GCN-NEXT: user_sgpr_count = 6 408; GCN-NEXT: enable_trap_handler = 0 409; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 410; GCN-NEXT: enable_sgpr_workgroup_id_y = 0 411; GCN-NEXT: enable_sgpr_workgroup_id_z = 0 412; GCN-NEXT: enable_sgpr_workgroup_info = 0 413; GCN-NEXT: enable_vgpr_workitem_id = 0 414; GCN-NEXT: enable_exception_msb = 0 415; GCN-NEXT: granulated_lds_size = 0 416; GCN-NEXT: enable_exception = 0 417; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 418; GCN-NEXT: enable_sgpr_dispatch_ptr = 0 419; GCN-NEXT: enable_sgpr_queue_ptr = 0 420; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 421; GCN-NEXT: enable_sgpr_dispatch_id = 0 422; GCN-NEXT: enable_sgpr_flat_scratch_init = 0 423; GCN-NEXT: enable_sgpr_private_segment_size = 0 424; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 425; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 426; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 427; GCN-NEXT: enable_ordered_append_gds = 0 428; GCN-NEXT: private_element_size = 1 429; GCN-NEXT: is_ptr64 = 1 430; GCN-NEXT: is_dynamic_callstack = 0 431; GCN-NEXT: is_debug_enabled = 0 432; GCN-NEXT: is_xnack_enabled = 0 433; GCN-NEXT: workitem_private_segment_byte_size = 0 434; GCN-NEXT: workgroup_group_segment_byte_size = 0 435; GCN-NEXT: gds_segment_byte_size = 0 436; GCN-NEXT: kernarg_segment_byte_size = 32 437; GCN-NEXT: workgroup_fbarrier_count = 0 438; GCN-NEXT: wavefront_sgpr_count = 16 439; GCN-NEXT: workitem_vgpr_count = 8 440; GCN-NEXT: reserved_vgpr_first = 0 441; GCN-NEXT: reserved_vgpr_count = 0 442; GCN-NEXT: reserved_sgpr_first = 0 443; GCN-NEXT: reserved_sgpr_count = 0 444; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 445; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 446; GCN-NEXT: kernarg_segment_alignment = 4 447; GCN-NEXT: group_segment_alignment = 4 448; GCN-NEXT: private_segment_alignment = 4 449; GCN-NEXT: wavefront_size = 6 450; GCN-NEXT: call_convention = -1 451; GCN-NEXT: runtime_loader_kernel_symbol = 0 452; GCN-NEXT: .end_amd_kernel_code_t 453; GCN-NEXT: ; %bb.0: 454; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x0 455; GCN-NEXT: s_waitcnt lgkmcnt(0) 456; GCN-NEXT: s_lshr_b64 s[6:7], s[0:1], s4 457; GCN-NEXT: s_sub_i32 s5, 64, s4 458; GCN-NEXT: s_sub_i32 s12, s4, 64 459; GCN-NEXT: s_ashr_i64 s[8:9], s[2:3], s4 460; GCN-NEXT: s_ashr_i32 s13, s3, 31 461; GCN-NEXT: s_lshl_b64 s[10:11], s[2:3], s5 462; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], s12 463; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] 464; GCN-NEXT: v_mov_b32_e32 v0, s1 465; GCN-NEXT: v_mov_b32_e32 v4, s0 466; GCN-NEXT: v_mov_b32_e32 v2, s13 467; GCN-NEXT: v_mov_b32_e32 v3, s9 468; GCN-NEXT: v_mov_b32_e32 v5, s8 469; GCN-NEXT: v_mov_b32_e32 v1, s3 470; GCN-NEXT: v_mov_b32_e32 v6, s2 471; GCN-NEXT: v_mov_b32_e32 v7, s7 472; GCN-NEXT: v_cmp_lt_u32_e64 vcc, s4, 64 473; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 474; GCN-NEXT: v_mov_b32_e32 v7, s6 475; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 0 476; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v0, s[0:1] 477; GCN-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc 478; GCN-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc 479; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 480; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] 481; GCN-NEXT: v_mov_b32_e32 v4, 0 482; GCN-NEXT: v_mov_b32_e32 v5, 0 483; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 484; GCN-NEXT: s_endpgm 485 %shift = ashr i128 %lhs, %rhs 486 store i128 %shift, i128 addrspace(1)* null 487 ret void 488} 489 490define <2 x i128> @v_shl_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { 491; GCN-LABEL: v_shl_v2i128_vv: 492; GCN: ; %bb.0: 493; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 494; GCN-NEXT: v_lshl_b64 v[16:17], v[2:3], v8 495; GCN-NEXT: v_sub_i32_e32 v18, vcc, 64, v8 496; GCN-NEXT: v_lshr_b64 v[18:19], v[0:1], v18 497; GCN-NEXT: v_or_b32_e32 v20, v17, v19 498; GCN-NEXT: v_or_b32_e32 v21, v16, v18 499; GCN-NEXT: v_sub_i32_e32 v16, vcc, 64, v12 500; GCN-NEXT: v_lshr_b64 v[16:17], v[4:5], v16 501; GCN-NEXT: v_lshl_b64 v[18:19], v[6:7], v12 502; GCN-NEXT: v_or_b32_e32 v17, v19, v17 503; GCN-NEXT: v_or_b32_e32 v16, v18, v16 504; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[10:11] 505; GCN-NEXT: v_or_b32_e32 v11, v9, v11 506; GCN-NEXT: v_or_b32_e32 v10, v8, v10 507; GCN-NEXT: v_cmp_eq_u64_e64 s[8:9], 0, v[14:15] 508; GCN-NEXT: v_or_b32_e32 v15, v13, v15 509; GCN-NEXT: v_or_b32_e32 v14, v12, v14 510; GCN-NEXT: v_cmp_gt_u64_e64 s[10:11], 64, v[8:9] 511; GCN-NEXT: v_subrev_i32_e32 v18, vcc, 64, v8 512; GCN-NEXT: v_lshl_b64 v[8:9], v[0:1], v8 513; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], v18 514; GCN-NEXT: v_cmp_gt_u64_e64 s[12:13], 64, v[12:13] 515; GCN-NEXT: v_subrev_i32_e32 v18, vcc, 64, v12 516; GCN-NEXT: v_lshl_b64 v[12:13], v[4:5], v12 517; GCN-NEXT: v_lshl_b64 v[4:5], v[4:5], v18 518; GCN-NEXT: s_and_b64 vcc, s[6:7], s[10:11] 519; GCN-NEXT: v_cndmask_b32_e32 v18, v1, v20, vcc 520; GCN-NEXT: v_cndmask_b32_e32 v19, v0, v21, vcc 521; GCN-NEXT: s_and_b64 s[6:7], s[8:9], s[12:13] 522; GCN-NEXT: v_cndmask_b32_e64 v17, v5, v17, s[6:7] 523; GCN-NEXT: v_cndmask_b32_e64 v4, v4, v16, s[6:7] 524; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v9, vcc 525; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v8, vcc 526; GCN-NEXT: v_cndmask_b32_e64 v5, 0, v13, s[6:7] 527; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 528; GCN-NEXT: v_cndmask_b32_e32 v3, v18, v3, vcc 529; GCN-NEXT: v_cndmask_b32_e32 v2, v19, v2, vcc 530; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] 531; GCN-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc 532; GCN-NEXT: v_cndmask_b32_e32 v6, v4, v6, vcc 533; GCN-NEXT: v_cndmask_b32_e64 v4, 0, v12, s[6:7] 534; GCN-NEXT: s_setpc_b64 s[30:31] 535 %shl = shl <2 x i128> %lhs, %rhs 536 ret <2 x i128> %shl 537} 538 539define <2 x i128> @v_lshr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { 540; GCN-LABEL: v_lshr_v2i128_vv: 541; GCN: ; %bb.0: 542; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 543; GCN-NEXT: v_lshr_b64 v[16:17], v[0:1], v8 544; GCN-NEXT: v_sub_i32_e32 v18, vcc, 64, v8 545; GCN-NEXT: v_lshl_b64 v[18:19], v[2:3], v18 546; GCN-NEXT: v_or_b32_e32 v20, v17, v19 547; GCN-NEXT: v_or_b32_e32 v21, v16, v18 548; GCN-NEXT: v_sub_i32_e32 v16, vcc, 64, v12 549; GCN-NEXT: v_lshl_b64 v[16:17], v[6:7], v16 550; GCN-NEXT: v_lshr_b64 v[18:19], v[4:5], v12 551; GCN-NEXT: v_or_b32_e32 v17, v19, v17 552; GCN-NEXT: v_or_b32_e32 v16, v18, v16 553; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[10:11] 554; GCN-NEXT: v_or_b32_e32 v11, v9, v11 555; GCN-NEXT: v_or_b32_e32 v10, v8, v10 556; GCN-NEXT: v_cmp_eq_u64_e64 s[8:9], 0, v[14:15] 557; GCN-NEXT: v_or_b32_e32 v15, v13, v15 558; GCN-NEXT: v_or_b32_e32 v14, v12, v14 559; GCN-NEXT: v_cmp_gt_u64_e64 s[10:11], 64, v[8:9] 560; GCN-NEXT: v_subrev_i32_e32 v18, vcc, 64, v8 561; GCN-NEXT: v_lshr_b64 v[8:9], v[2:3], v8 562; GCN-NEXT: v_lshr_b64 v[2:3], v[2:3], v18 563; GCN-NEXT: v_cmp_gt_u64_e64 s[12:13], 64, v[12:13] 564; GCN-NEXT: v_subrev_i32_e32 v18, vcc, 64, v12 565; GCN-NEXT: v_lshr_b64 v[12:13], v[6:7], v12 566; GCN-NEXT: v_lshr_b64 v[6:7], v[6:7], v18 567; GCN-NEXT: s_and_b64 vcc, s[6:7], s[10:11] 568; GCN-NEXT: v_cndmask_b32_e32 v18, v3, v20, vcc 569; GCN-NEXT: v_cndmask_b32_e32 v19, v2, v21, vcc 570; GCN-NEXT: s_and_b64 s[6:7], s[8:9], s[12:13] 571; GCN-NEXT: v_cndmask_b32_e64 v17, v7, v17, s[6:7] 572; GCN-NEXT: v_cndmask_b32_e64 v6, v6, v16, s[6:7] 573; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 574; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 575; GCN-NEXT: v_cndmask_b32_e64 v7, 0, v13, s[6:7] 576; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 577; GCN-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc 578; GCN-NEXT: v_cndmask_b32_e32 v0, v19, v0, vcc 579; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] 580; GCN-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc 581; GCN-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 582; GCN-NEXT: v_cndmask_b32_e64 v6, 0, v12, s[6:7] 583; GCN-NEXT: s_setpc_b64 s[30:31] 584 %shl = lshr <2 x i128> %lhs, %rhs 585 ret <2 x i128> %shl 586} 587 588define <2 x i128> @v_ashr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { 589; GCN-LABEL: v_ashr_v2i128_vv: 590; GCN: ; %bb.0: 591; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 592; GCN-NEXT: v_lshr_b64 v[16:17], v[0:1], v8 593; GCN-NEXT: v_sub_i32_e32 v18, vcc, 64, v8 594; GCN-NEXT: v_lshl_b64 v[18:19], v[2:3], v18 595; GCN-NEXT: v_or_b32_e32 v20, v17, v19 596; GCN-NEXT: v_or_b32_e32 v21, v16, v18 597; GCN-NEXT: v_sub_i32_e32 v16, vcc, 64, v12 598; GCN-NEXT: v_lshl_b64 v[16:17], v[6:7], v16 599; GCN-NEXT: v_lshr_b64 v[18:19], v[4:5], v12 600; GCN-NEXT: v_or_b32_e32 v19, v19, v17 601; GCN-NEXT: v_or_b32_e32 v18, v18, v16 602; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[10:11] 603; GCN-NEXT: v_or_b32_e32 v11, v9, v11 604; GCN-NEXT: v_or_b32_e32 v10, v8, v10 605; GCN-NEXT: v_cmp_eq_u64_e64 s[8:9], 0, v[14:15] 606; GCN-NEXT: v_or_b32_e32 v15, v13, v15 607; GCN-NEXT: v_or_b32_e32 v14, v12, v14 608; GCN-NEXT: v_cmp_gt_u64_e64 s[10:11], 64, v[8:9] 609; GCN-NEXT: v_subrev_i32_e32 v9, vcc, 64, v8 610; GCN-NEXT: v_ashr_i64 v[16:17], v[2:3], v9 611; GCN-NEXT: s_and_b64 s[6:7], s[6:7], s[10:11] 612; GCN-NEXT: v_cndmask_b32_e64 v17, v17, v20, s[6:7] 613; GCN-NEXT: v_cndmask_b32_e64 v16, v16, v21, s[6:7] 614; GCN-NEXT: v_cmp_gt_u64_e64 s[10:11], 64, v[12:13] 615; GCN-NEXT: v_ashr_i64 v[8:9], v[2:3], v8 616; GCN-NEXT: v_ashrrev_i32_e32 v20, 31, v3 617; GCN-NEXT: v_subrev_i32_e32 v2, vcc, 64, v12 618; GCN-NEXT: v_ashr_i64 v[12:13], v[6:7], v12 619; GCN-NEXT: v_ashrrev_i32_e32 v21, 31, v7 620; GCN-NEXT: v_ashr_i64 v[2:3], v[6:7], v2 621; GCN-NEXT: s_and_b64 vcc, s[8:9], s[10:11] 622; GCN-NEXT: v_cndmask_b32_e32 v6, v3, v19, vcc 623; GCN-NEXT: v_cndmask_b32_e32 v18, v2, v18, vcc 624; GCN-NEXT: v_cndmask_b32_e64 v3, v20, v9, s[6:7] 625; GCN-NEXT: v_cndmask_b32_e64 v2, v20, v8, s[6:7] 626; GCN-NEXT: v_cndmask_b32_e32 v7, v21, v13, vcc 627; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[10:11] 628; GCN-NEXT: v_cndmask_b32_e64 v1, v17, v1, s[6:7] 629; GCN-NEXT: v_cndmask_b32_e64 v0, v16, v0, s[6:7] 630; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[14:15] 631; GCN-NEXT: v_cndmask_b32_e64 v5, v6, v5, s[6:7] 632; GCN-NEXT: v_cndmask_b32_e64 v4, v18, v4, s[6:7] 633; GCN-NEXT: v_cndmask_b32_e32 v6, v21, v12, vcc 634; GCN-NEXT: s_setpc_b64 s[30:31] 635 %shl = ashr <2 x i128> %lhs, %rhs 636 ret <2 x i128> %shl 637} 638 639define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) { 640; GCN-LABEL: s_shl_v2i128ss: 641; GCN: .amd_kernel_code_t 642; GCN-NEXT: amd_code_version_major = 1 643; GCN-NEXT: amd_code_version_minor = 2 644; GCN-NEXT: amd_machine_kind = 1 645; GCN-NEXT: amd_machine_version_major = 7 646; GCN-NEXT: amd_machine_version_minor = 0 647; GCN-NEXT: amd_machine_version_stepping = 0 648; GCN-NEXT: kernel_code_entry_byte_offset = 256 649; GCN-NEXT: kernel_code_prefetch_byte_size = 0 650; GCN-NEXT: granulated_workitem_vgpr_count = 3 651; GCN-NEXT: granulated_wavefront_sgpr_count = 4 652; GCN-NEXT: priority = 0 653; GCN-NEXT: float_mode = 192 654; GCN-NEXT: priv = 0 655; GCN-NEXT: enable_dx10_clamp = 1 656; GCN-NEXT: debug_mode = 0 657; GCN-NEXT: enable_ieee_mode = 1 658; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 659; GCN-NEXT: user_sgpr_count = 6 660; GCN-NEXT: enable_trap_handler = 0 661; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 662; GCN-NEXT: enable_sgpr_workgroup_id_y = 0 663; GCN-NEXT: enable_sgpr_workgroup_id_z = 0 664; GCN-NEXT: enable_sgpr_workgroup_info = 0 665; GCN-NEXT: enable_vgpr_workitem_id = 0 666; GCN-NEXT: enable_exception_msb = 0 667; GCN-NEXT: granulated_lds_size = 0 668; GCN-NEXT: enable_exception = 0 669; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 670; GCN-NEXT: enable_sgpr_dispatch_ptr = 0 671; GCN-NEXT: enable_sgpr_queue_ptr = 0 672; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 673; GCN-NEXT: enable_sgpr_dispatch_id = 0 674; GCN-NEXT: enable_sgpr_flat_scratch_init = 0 675; GCN-NEXT: enable_sgpr_private_segment_size = 0 676; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 677; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 678; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 679; GCN-NEXT: enable_ordered_append_gds = 0 680; GCN-NEXT: private_element_size = 1 681; GCN-NEXT: is_ptr64 = 1 682; GCN-NEXT: is_dynamic_callstack = 0 683; GCN-NEXT: is_debug_enabled = 0 684; GCN-NEXT: is_xnack_enabled = 0 685; GCN-NEXT: workitem_private_segment_byte_size = 0 686; GCN-NEXT: workgroup_group_segment_byte_size = 0 687; GCN-NEXT: gds_segment_byte_size = 0 688; GCN-NEXT: kernarg_segment_byte_size = 64 689; GCN-NEXT: workgroup_fbarrier_count = 0 690; GCN-NEXT: wavefront_sgpr_count = 36 691; GCN-NEXT: workitem_vgpr_count = 16 692; GCN-NEXT: reserved_vgpr_first = 0 693; GCN-NEXT: reserved_vgpr_count = 0 694; GCN-NEXT: reserved_sgpr_first = 0 695; GCN-NEXT: reserved_sgpr_count = 0 696; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 697; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 698; GCN-NEXT: kernarg_segment_alignment = 5 699; GCN-NEXT: group_segment_alignment = 4 700; GCN-NEXT: private_segment_alignment = 4 701; GCN-NEXT: wavefront_size = 6 702; GCN-NEXT: call_convention = -1 703; GCN-NEXT: runtime_loader_kernel_symbol = 0 704; GCN-NEXT: .end_amd_kernel_code_t 705; GCN-NEXT: ; %bb.0: 706; GCN-NEXT: s_load_dwordx8 s[12:19], s[4:5], 0x8 707; GCN-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 708; GCN-NEXT: s_waitcnt lgkmcnt(0) 709; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[12:13], 64 710; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[14:15], 0 711; GCN-NEXT: s_lshl_b64 s[20:21], s[4:5], s12 712; GCN-NEXT: s_lshl_b64 s[22:23], s[6:7], s12 713; GCN-NEXT: s_sub_i32 s30, 64, s12 714; GCN-NEXT: s_sub_i32 s31, s12, 64 715; GCN-NEXT: s_sub_i32 s32, 64, s16 716; GCN-NEXT: s_sub_i32 s33, s16, 64 717; GCN-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] 718; GCN-NEXT: v_cmp_lt_u64_e64 s[14:15], s[16:17], 64 719; GCN-NEXT: v_cmp_eq_u64_e64 s[24:25], s[18:19], 0 720; GCN-NEXT: s_lshl_b64 s[26:27], s[8:9], s16 721; GCN-NEXT: s_lshl_b64 s[28:29], s[10:11], s16 722; GCN-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] 723; GCN-NEXT: v_mov_b32_e32 v0, s21 724; GCN-NEXT: v_mov_b32_e32 v2, s20 725; GCN-NEXT: s_and_b64 vcc, s[2:3], s[0:1] 726; GCN-NEXT: v_mov_b32_e32 v8, 0 727; GCN-NEXT: v_mov_b32_e32 v9, 0 728; GCN-NEXT: v_mov_b32_e32 v10, 16 729; GCN-NEXT: v_mov_b32_e32 v11, 0 730; GCN-NEXT: v_mov_b32_e32 v3, s7 731; GCN-NEXT: v_mov_b32_e32 v6, s6 732; GCN-NEXT: v_mov_b32_e32 v7, s11 733; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc 734; GCN-NEXT: v_mov_b32_e32 v0, s27 735; GCN-NEXT: s_and_b64 s[0:1], s[24:25], s[14:15] 736; GCN-NEXT: s_lshr_b64 s[2:3], s[4:5], s30 737; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s31 738; GCN-NEXT: v_cndmask_b32_e64 v5, 0, v0, s[0:1] 739; GCN-NEXT: v_mov_b32_e32 v4, s26 740; GCN-NEXT: s_lshr_b64 s[6:7], s[8:9], s32 741; GCN-NEXT: s_lshl_b64 s[8:9], s[8:9], s33 742; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc 743; GCN-NEXT: s_or_b64 s[2:3], s[22:23], s[2:3] 744; GCN-NEXT: v_mov_b32_e32 v2, s5 745; GCN-NEXT: v_mov_b32_e32 v12, s4 746; GCN-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] 747; GCN-NEXT: s_or_b64 s[4:5], s[28:29], s[6:7] 748; GCN-NEXT: v_mov_b32_e32 v13, s9 749; GCN-NEXT: v_mov_b32_e32 v14, s8 750; GCN-NEXT: v_mov_b32_e32 v15, s3 751; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v15, vcc 752; GCN-NEXT: v_mov_b32_e32 v15, s2 753; GCN-NEXT: v_cndmask_b32_e32 v12, v12, v15, vcc 754; GCN-NEXT: v_mov_b32_e32 v15, s5 755; GCN-NEXT: v_cndmask_b32_e64 v13, v13, v15, s[0:1] 756; GCN-NEXT: v_mov_b32_e32 v15, s4 757; GCN-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[0:1] 758; GCN-NEXT: v_mov_b32_e32 v15, s10 759; GCN-NEXT: v_cmp_eq_u64_e64 vcc, s[12:13], 0 760; GCN-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc 761; GCN-NEXT: v_cndmask_b32_e32 v2, v12, v6, vcc 762; GCN-NEXT: v_cmp_eq_u64_e64 vcc, s[16:17], 0 763; GCN-NEXT: v_cndmask_b32_e32 v7, v13, v7, vcc 764; GCN-NEXT: v_cndmask_b32_e32 v6, v14, v15, vcc 765; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 766; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 767; GCN-NEXT: s_endpgm 768 %shift = shl <2 x i128> %lhs, %rhs 769 store <2 x i128> %shift, <2 x i128> addrspace(1)* null 770 ret void 771} 772 773define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) { 774; GCN-LABEL: s_lshr_v2i128_ss: 775; GCN: .amd_kernel_code_t 776; GCN-NEXT: amd_code_version_major = 1 777; GCN-NEXT: amd_code_version_minor = 2 778; GCN-NEXT: amd_machine_kind = 1 779; GCN-NEXT: amd_machine_version_major = 7 780; GCN-NEXT: amd_machine_version_minor = 0 781; GCN-NEXT: amd_machine_version_stepping = 0 782; GCN-NEXT: kernel_code_entry_byte_offset = 256 783; GCN-NEXT: kernel_code_prefetch_byte_size = 0 784; GCN-NEXT: granulated_workitem_vgpr_count = 4 785; GCN-NEXT: granulated_wavefront_sgpr_count = 4 786; GCN-NEXT: priority = 0 787; GCN-NEXT: float_mode = 192 788; GCN-NEXT: priv = 0 789; GCN-NEXT: enable_dx10_clamp = 1 790; GCN-NEXT: debug_mode = 0 791; GCN-NEXT: enable_ieee_mode = 1 792; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 793; GCN-NEXT: user_sgpr_count = 6 794; GCN-NEXT: enable_trap_handler = 0 795; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 796; GCN-NEXT: enable_sgpr_workgroup_id_y = 0 797; GCN-NEXT: enable_sgpr_workgroup_id_z = 0 798; GCN-NEXT: enable_sgpr_workgroup_info = 0 799; GCN-NEXT: enable_vgpr_workitem_id = 0 800; GCN-NEXT: enable_exception_msb = 0 801; GCN-NEXT: granulated_lds_size = 0 802; GCN-NEXT: enable_exception = 0 803; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 804; GCN-NEXT: enable_sgpr_dispatch_ptr = 0 805; GCN-NEXT: enable_sgpr_queue_ptr = 0 806; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 807; GCN-NEXT: enable_sgpr_dispatch_id = 0 808; GCN-NEXT: enable_sgpr_flat_scratch_init = 0 809; GCN-NEXT: enable_sgpr_private_segment_size = 0 810; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 811; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 812; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 813; GCN-NEXT: enable_ordered_append_gds = 0 814; GCN-NEXT: private_element_size = 1 815; GCN-NEXT: is_ptr64 = 1 816; GCN-NEXT: is_dynamic_callstack = 0 817; GCN-NEXT: is_debug_enabled = 0 818; GCN-NEXT: is_xnack_enabled = 0 819; GCN-NEXT: workitem_private_segment_byte_size = 0 820; GCN-NEXT: workgroup_group_segment_byte_size = 0 821; GCN-NEXT: gds_segment_byte_size = 0 822; GCN-NEXT: kernarg_segment_byte_size = 64 823; GCN-NEXT: workgroup_fbarrier_count = 0 824; GCN-NEXT: wavefront_sgpr_count = 36 825; GCN-NEXT: workitem_vgpr_count = 17 826; GCN-NEXT: reserved_vgpr_first = 0 827; GCN-NEXT: reserved_vgpr_count = 0 828; GCN-NEXT: reserved_sgpr_first = 0 829; GCN-NEXT: reserved_sgpr_count = 0 830; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 831; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 832; GCN-NEXT: kernarg_segment_alignment = 5 833; GCN-NEXT: group_segment_alignment = 4 834; GCN-NEXT: private_segment_alignment = 4 835; GCN-NEXT: wavefront_size = 6 836; GCN-NEXT: call_convention = -1 837; GCN-NEXT: runtime_loader_kernel_symbol = 0 838; GCN-NEXT: .end_amd_kernel_code_t 839; GCN-NEXT: ; %bb.0: 840; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x8 841; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x0 842; GCN-NEXT: s_waitcnt lgkmcnt(0) 843; GCN-NEXT: v_cmp_lt_u64_e64 s[16:17], s[8:9], 64 844; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[10:11], 0 845; GCN-NEXT: s_lshr_b64 s[20:21], s[2:3], s8 846; GCN-NEXT: s_lshr_b64 s[22:23], s[0:1], s8 847; GCN-NEXT: s_sub_i32 s30, 64, s8 848; GCN-NEXT: s_sub_i32 s31, s8, 64 849; GCN-NEXT: s_sub_i32 s32, 64, s12 850; GCN-NEXT: s_sub_i32 s33, s12, 64 851; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 852; GCN-NEXT: v_cmp_lt_u64_e64 s[10:11], s[12:13], 64 853; GCN-NEXT: v_cmp_eq_u64_e64 s[24:25], s[14:15], 0 854; GCN-NEXT: s_lshr_b64 s[26:27], s[6:7], s12 855; GCN-NEXT: s_lshr_b64 s[28:29], s[4:5], s12 856; GCN-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] 857; GCN-NEXT: v_mov_b32_e32 v0, s21 858; GCN-NEXT: v_mov_b32_e32 v1, s20 859; GCN-NEXT: s_and_b64 vcc, s[18:19], s[16:17] 860; GCN-NEXT: v_mov_b32_e32 v8, 0 861; GCN-NEXT: v_mov_b32_e32 v9, 0 862; GCN-NEXT: v_mov_b32_e32 v10, 16 863; GCN-NEXT: v_mov_b32_e32 v11, 0 864; GCN-NEXT: v_mov_b32_e32 v4, s1 865; GCN-NEXT: v_mov_b32_e32 v5, s0 866; GCN-NEXT: v_mov_b32_e32 v12, s5 867; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v0, vcc 868; GCN-NEXT: v_mov_b32_e32 v0, s27 869; GCN-NEXT: s_and_b64 s[0:1], s[24:25], s[10:11] 870; GCN-NEXT: s_lshl_b64 s[10:11], s[2:3], s30 871; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], s31 872; GCN-NEXT: v_cndmask_b32_e64 v7, 0, v0, s[0:1] 873; GCN-NEXT: v_mov_b32_e32 v0, s26 874; GCN-NEXT: s_lshl_b64 s[14:15], s[6:7], s32 875; GCN-NEXT: s_lshr_b64 s[6:7], s[6:7], s33 876; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 877; GCN-NEXT: s_or_b64 s[10:11], s[22:23], s[10:11] 878; GCN-NEXT: v_mov_b32_e32 v1, s3 879; GCN-NEXT: v_mov_b32_e32 v13, s2 880; GCN-NEXT: v_cndmask_b32_e64 v6, 0, v0, s[0:1] 881; GCN-NEXT: s_or_b64 s[2:3], s[28:29], s[14:15] 882; GCN-NEXT: v_mov_b32_e32 v0, s7 883; GCN-NEXT: v_mov_b32_e32 v14, s6 884; GCN-NEXT: v_mov_b32_e32 v15, s11 885; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 886; GCN-NEXT: v_mov_b32_e32 v15, s10 887; GCN-NEXT: v_cndmask_b32_e32 v13, v13, v15, vcc 888; GCN-NEXT: v_mov_b32_e32 v15, s3 889; GCN-NEXT: v_cndmask_b32_e64 v15, v0, v15, s[0:1] 890; GCN-NEXT: v_mov_b32_e32 v0, s2 891; GCN-NEXT: v_cndmask_b32_e64 v14, v14, v0, s[0:1] 892; GCN-NEXT: v_mov_b32_e32 v16, s4 893; GCN-NEXT: v_cmp_eq_u64_e64 vcc, s[8:9], 0 894; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 895; GCN-NEXT: v_cndmask_b32_e32 v0, v13, v5, vcc 896; GCN-NEXT: v_cmp_eq_u64_e64 vcc, s[12:13], 0 897; GCN-NEXT: v_cndmask_b32_e32 v5, v15, v12, vcc 898; GCN-NEXT: v_cndmask_b32_e32 v4, v14, v16, vcc 899; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 900; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 901; GCN-NEXT: s_endpgm 902 %shift = lshr <2 x i128> %lhs, %rhs 903 store <2 x i128> %shift, <2 x i128> addrspace(1)* null 904 ret void 905} 906 907define amdgpu_kernel void @s_ashr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) { 908; GCN-LABEL: s_ashr_v2i128_ss: 909; GCN: .amd_kernel_code_t 910; GCN-NEXT: amd_code_version_major = 1 911; GCN-NEXT: amd_code_version_minor = 2 912; GCN-NEXT: amd_machine_kind = 1 913; GCN-NEXT: amd_machine_version_major = 7 914; GCN-NEXT: amd_machine_version_minor = 0 915; GCN-NEXT: amd_machine_version_stepping = 0 916; GCN-NEXT: kernel_code_entry_byte_offset = 256 917; GCN-NEXT: kernel_code_prefetch_byte_size = 0 918; GCN-NEXT: granulated_workitem_vgpr_count = 4 919; GCN-NEXT: granulated_wavefront_sgpr_count = 4 920; GCN-NEXT: priority = 0 921; GCN-NEXT: float_mode = 192 922; GCN-NEXT: priv = 0 923; GCN-NEXT: enable_dx10_clamp = 1 924; GCN-NEXT: debug_mode = 0 925; GCN-NEXT: enable_ieee_mode = 1 926; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 927; GCN-NEXT: user_sgpr_count = 6 928; GCN-NEXT: enable_trap_handler = 0 929; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 930; GCN-NEXT: enable_sgpr_workgroup_id_y = 0 931; GCN-NEXT: enable_sgpr_workgroup_id_z = 0 932; GCN-NEXT: enable_sgpr_workgroup_info = 0 933; GCN-NEXT: enable_vgpr_workitem_id = 0 934; GCN-NEXT: enable_exception_msb = 0 935; GCN-NEXT: granulated_lds_size = 0 936; GCN-NEXT: enable_exception = 0 937; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 938; GCN-NEXT: enable_sgpr_dispatch_ptr = 0 939; GCN-NEXT: enable_sgpr_queue_ptr = 0 940; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 941; GCN-NEXT: enable_sgpr_dispatch_id = 0 942; GCN-NEXT: enable_sgpr_flat_scratch_init = 0 943; GCN-NEXT: enable_sgpr_private_segment_size = 0 944; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 945; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 946; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 947; GCN-NEXT: enable_ordered_append_gds = 0 948; GCN-NEXT: private_element_size = 1 949; GCN-NEXT: is_ptr64 = 1 950; GCN-NEXT: is_dynamic_callstack = 0 951; GCN-NEXT: is_debug_enabled = 0 952; GCN-NEXT: is_xnack_enabled = 0 953; GCN-NEXT: workitem_private_segment_byte_size = 0 954; GCN-NEXT: workgroup_group_segment_byte_size = 0 955; GCN-NEXT: gds_segment_byte_size = 0 956; GCN-NEXT: kernarg_segment_byte_size = 64 957; GCN-NEXT: workgroup_fbarrier_count = 0 958; GCN-NEXT: wavefront_sgpr_count = 37 959; GCN-NEXT: workitem_vgpr_count = 17 960; GCN-NEXT: reserved_vgpr_first = 0 961; GCN-NEXT: reserved_vgpr_count = 0 962; GCN-NEXT: reserved_sgpr_first = 0 963; GCN-NEXT: reserved_sgpr_count = 0 964; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 965; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 966; GCN-NEXT: kernarg_segment_alignment = 5 967; GCN-NEXT: group_segment_alignment = 4 968; GCN-NEXT: private_segment_alignment = 4 969; GCN-NEXT: wavefront_size = 6 970; GCN-NEXT: call_convention = -1 971; GCN-NEXT: runtime_loader_kernel_symbol = 0 972; GCN-NEXT: .end_amd_kernel_code_t 973; GCN-NEXT: ; %bb.0: 974; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x8 975; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x0 976; GCN-NEXT: s_waitcnt lgkmcnt(0) 977; GCN-NEXT: v_cmp_lt_u64_e64 s[16:17], s[8:9], 64 978; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[10:11], 0 979; GCN-NEXT: s_ashr_i64 s[20:21], s[2:3], s8 980; GCN-NEXT: s_ashr_i32 s30, s3, 31 981; GCN-NEXT: s_lshr_b64 s[22:23], s[0:1], s8 982; GCN-NEXT: s_sub_i32 s31, 64, s8 983; GCN-NEXT: s_sub_i32 s32, s8, 64 984; GCN-NEXT: s_sub_i32 s33, 64, s12 985; GCN-NEXT: s_sub_i32 s34, s12, 64 986; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 987; GCN-NEXT: v_cmp_lt_u64_e64 s[10:11], s[12:13], 64 988; GCN-NEXT: v_cmp_eq_u64_e64 s[24:25], s[14:15], 0 989; GCN-NEXT: s_ashr_i64 s[26:27], s[6:7], s12 990; GCN-NEXT: s_lshr_b64 s[28:29], s[4:5], s12 991; GCN-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] 992; GCN-NEXT: v_mov_b32_e32 v0, s21 993; GCN-NEXT: v_mov_b32_e32 v1, s20 994; GCN-NEXT: v_mov_b32_e32 v2, s30 995; GCN-NEXT: s_ashr_i32 s14, s7, 31 996; GCN-NEXT: s_and_b64 vcc, s[18:19], s[16:17] 997; GCN-NEXT: v_mov_b32_e32 v4, s14 998; GCN-NEXT: v_mov_b32_e32 v8, 0 999; GCN-NEXT: v_mov_b32_e32 v9, 0 1000; GCN-NEXT: v_mov_b32_e32 v10, 16 1001; GCN-NEXT: v_mov_b32_e32 v11, 0 1002; GCN-NEXT: v_mov_b32_e32 v5, s1 1003; GCN-NEXT: v_mov_b32_e32 v12, s0 1004; GCN-NEXT: v_mov_b32_e32 v13, s5 1005; GCN-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc 1006; GCN-NEXT: v_mov_b32_e32 v0, s27 1007; GCN-NEXT: s_and_b64 s[0:1], s[24:25], s[10:11] 1008; GCN-NEXT: s_lshl_b64 s[10:11], s[2:3], s31 1009; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], s32 1010; GCN-NEXT: v_cndmask_b32_e64 v7, v4, v0, s[0:1] 1011; GCN-NEXT: v_mov_b32_e32 v0, s26 1012; GCN-NEXT: s_lshl_b64 s[14:15], s[6:7], s33 1013; GCN-NEXT: s_ashr_i64 s[6:7], s[6:7], s34 1014; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v1, vcc 1015; GCN-NEXT: s_or_b64 s[10:11], s[22:23], s[10:11] 1016; GCN-NEXT: v_mov_b32_e32 v1, s3 1017; GCN-NEXT: v_mov_b32_e32 v14, s2 1018; GCN-NEXT: v_cndmask_b32_e64 v6, v4, v0, s[0:1] 1019; GCN-NEXT: s_or_b64 s[2:3], s[28:29], s[14:15] 1020; GCN-NEXT: v_mov_b32_e32 v0, s7 1021; GCN-NEXT: v_mov_b32_e32 v4, s6 1022; GCN-NEXT: v_mov_b32_e32 v15, s11 1023; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 1024; GCN-NEXT: v_mov_b32_e32 v15, s10 1025; GCN-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc 1026; GCN-NEXT: v_mov_b32_e32 v15, s3 1027; GCN-NEXT: v_cndmask_b32_e64 v15, v0, v15, s[0:1] 1028; GCN-NEXT: v_mov_b32_e32 v0, s2 1029; GCN-NEXT: v_cndmask_b32_e64 v4, v4, v0, s[0:1] 1030; GCN-NEXT: v_mov_b32_e32 v16, s4 1031; GCN-NEXT: v_cmp_eq_u64_e64 vcc, s[8:9], 0 1032; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1033; GCN-NEXT: v_cndmask_b32_e32 v0, v14, v12, vcc 1034; GCN-NEXT: v_cmp_eq_u64_e64 vcc, s[12:13], 0 1035; GCN-NEXT: v_cndmask_b32_e32 v5, v15, v13, vcc 1036; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v16, vcc 1037; GCN-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 1038; GCN-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 1039; GCN-NEXT: s_endpgm 1040 %shift = ashr <2 x i128> %lhs, %rhs 1041 store <2 x i128> %shift, <2 x i128> addrspace(1)* null 1042 ret void 1043} 1044 1045