1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s 6 7define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) { 8; GFX6-LABEL: s_fshr_i7: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 11; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 12; GFX6-NEXT: s_movk_i32 s3, 0x7f 13; GFX6-NEXT: s_and_b32 s2, s2, s3 14; GFX6-NEXT: s_lshl_b32 s0, s0, 1 15; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 16; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 17; GFX6-NEXT: s_and_b32 s1, s1, s3 18; GFX6-NEXT: v_mul_lo_u32 v1, -7, v0 19; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 20; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 21; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 22; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7 23; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 24; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0 25; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 26; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 27; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0 28; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 29; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 30; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0 31; GFX6-NEXT: v_and_b32_e32 v0, s3, v0 32; GFX6-NEXT: v_and_b32_e32 v1, s3, v1 33; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 34; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0 35; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 36; GFX6-NEXT: v_readfirstlane_b32 s0, v0 37; GFX6-NEXT: ; return to shader part epilog 38; 39; GFX8-LABEL: s_fshr_i7: 40; GFX8: ; %bb.0: 41; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 42; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 43; GFX8-NEXT: s_movk_i32 s3, 0x7f 44; GFX8-NEXT: s_and_b32 s2, s2, s3 45; GFX8-NEXT: s_lshl_b32 s0, s0, 1 46; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 47; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 48; GFX8-NEXT: s_and_b32 s1, s1, s3 49; GFX8-NEXT: v_mul_lo_u32 v1, -7, v0 50; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 51; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 52; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0 53; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7 54; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 55; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0 56; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 57; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 58; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0 59; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 60; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 61; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0 62; GFX8-NEXT: v_and_b32_e32 v0, s3, v0 63; GFX8-NEXT: v_and_b32_e32 v1, s3, v1 64; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0 65; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s1 66; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 67; GFX8-NEXT: v_readfirstlane_b32 s0, v0 68; GFX8-NEXT: ; return to shader part epilog 69; 70; GFX9-LABEL: s_fshr_i7: 71; GFX9: ; %bb.0: 72; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 73; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 74; GFX9-NEXT: s_movk_i32 s3, 0x7f 75; GFX9-NEXT: s_and_b32 s2, s2, s3 76; GFX9-NEXT: s_lshl_b32 s0, s0, 1 77; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 78; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 79; GFX9-NEXT: s_and_b32 s1, s1, s3 80; GFX9-NEXT: v_mul_lo_u32 v1, -7, v0 81; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 82; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 83; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 84; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7 85; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0 86; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0 87; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 88; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 89; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0 90; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 91; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 92; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0 93; GFX9-NEXT: v_and_b32_e32 v0, s3, v0 94; GFX9-NEXT: v_and_b32_e32 v1, s3, v1 95; GFX9-NEXT: v_lshlrev_b16_e64 v1, v1, s0 96; GFX9-NEXT: v_lshrrev_b16_e64 v0, v0, s1 97; GFX9-NEXT: v_or_b32_e32 v0, v1, v0 98; GFX9-NEXT: v_readfirstlane_b32 s0, v0 99; GFX9-NEXT: ; return to shader part epilog 100; 101; GFX10-LABEL: s_fshr_i7: 102; GFX10: ; %bb.0: 103; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 104; GFX10-NEXT: s_movk_i32 s3, 0x7f 105; GFX10-NEXT: s_lshl_b32 s0, s0, 1 106; GFX10-NEXT: s_and_b32 s2, s2, s3 107; GFX10-NEXT: s_and_b32 s1, s1, s3 108; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 109; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 110; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 111; GFX10-NEXT: v_mul_lo_u32 v1, -7, v0 112; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 113; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 114; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0 115; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7 116; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0 117; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 118; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 119; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 120; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 121; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 122; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 123; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0 124; GFX10-NEXT: v_and_b32_e32 v0, s3, v0 125; GFX10-NEXT: v_and_b32_e32 v1, s3, v1 126; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1 127; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0 128; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 129; GFX10-NEXT: v_readfirstlane_b32 s0, v0 130; GFX10-NEXT: ; return to shader part epilog 131 %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt) 132 ret i7 %result 133} 134 135define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) { 136; GFX6-LABEL: v_fshr_i7: 137; GFX6: ; %bb.0: 138; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 139; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 140; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 141; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2 142; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 143; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 144; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 145; GFX6-NEXT: v_mul_lo_u32 v4, -7, v3 146; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4 147; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 148; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 149; GFX6-NEXT: v_mov_b32_e32 v4, 0x7f 150; GFX6-NEXT: v_and_b32_e32 v1, v1, v4 151; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7 152; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 153; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2 154; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 155; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 156; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2 157; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 158; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 159; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2 160; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 161; GFX6-NEXT: v_and_b32_e32 v3, v3, v4 162; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 163; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 164; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 165; GFX6-NEXT: s_setpc_b64 s[30:31] 166; 167; GFX8-LABEL: v_fshr_i7: 168; GFX8: ; %bb.0: 169; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 171; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 172; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2 173; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 174; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 175; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 176; GFX8-NEXT: v_mul_lo_u32 v4, -7, v3 177; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4 178; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4 179; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3 180; GFX8-NEXT: v_mov_b32_e32 v4, 0x7f 181; GFX8-NEXT: v_and_b32_e32 v1, v1, v4 182; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7 183; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 184; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2 185; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 186; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 187; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2 188; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 189; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 190; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2 191; GFX8-NEXT: v_and_b32_e32 v2, v2, v4 192; GFX8-NEXT: v_and_b32_e32 v3, v3, v4 193; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 194; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 195; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 196; GFX8-NEXT: s_setpc_b64 s[30:31] 197; 198; GFX9-LABEL: v_fshr_i7: 199; GFX9: ; %bb.0: 200; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 202; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 203; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2 204; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 205; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 206; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 207; GFX9-NEXT: v_mul_lo_u32 v4, -7, v3 208; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 209; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 210; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 211; GFX9-NEXT: v_mov_b32_e32 v4, 0x7f 212; GFX9-NEXT: v_and_b32_e32 v1, v1, v4 213; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7 214; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 215; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2 216; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 217; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 218; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2 219; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 220; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 221; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2 222; GFX9-NEXT: v_and_b32_e32 v2, v2, v4 223; GFX9-NEXT: v_and_b32_e32 v3, v3, v4 224; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 225; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1 226; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 227; GFX9-NEXT: s_setpc_b64 s[30:31] 228; 229; GFX10-LABEL: v_fshr_i7: 230; GFX10: ; %bb.0: 231; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 232; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 233; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 234; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 235; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 236; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1 237; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 238; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 239; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 240; GFX10-NEXT: v_mul_lo_u32 v4, -7, v3 241; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4 242; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 243; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 244; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7 245; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 246; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2 247; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 248; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 249; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2 250; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 251; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 252; GFX10-NEXT: v_mov_b32_e32 v3, 0x7f 253; GFX10-NEXT: v_sub_nc_u16 v4, 6, v2 254; GFX10-NEXT: v_and_b32_e32 v2, v2, v3 255; GFX10-NEXT: v_and_b32_e32 v4, v4, v3 256; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 257; GFX10-NEXT: v_lshlrev_b16 v0, v4, v0 258; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 259; GFX10-NEXT: s_setpc_b64 s[30:31] 260 %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt) 261 ret i7 %result 262} 263 264define amdgpu_ps i8 @s_fshr_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) { 265; GFX6-LABEL: s_fshr_i8: 266; GFX6: ; %bb.0: 267; GFX6-NEXT: s_and_b32 s3, s2, 7 268; GFX6-NEXT: s_andn2_b32 s2, 7, s2 269; GFX6-NEXT: s_lshl_b32 s0, s0, 1 270; GFX6-NEXT: s_and_b32 s1, s1, 0xff 271; GFX6-NEXT: s_lshl_b32 s0, s0, s2 272; GFX6-NEXT: s_lshr_b32 s1, s1, s3 273; GFX6-NEXT: s_or_b32 s0, s0, s1 274; GFX6-NEXT: ; return to shader part epilog 275; 276; GFX8-LABEL: s_fshr_i8: 277; GFX8: ; %bb.0: 278; GFX8-NEXT: s_and_b32 s1, s1, 0xff 279; GFX8-NEXT: s_and_b32 s3, s2, 7 280; GFX8-NEXT: s_andn2_b32 s2, 7, s2 281; GFX8-NEXT: s_lshl_b32 s0, s0, 1 282; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 283; GFX8-NEXT: s_lshl_b32 s0, s0, s2 284; GFX8-NEXT: s_lshr_b32 s1, s1, s3 285; GFX8-NEXT: s_or_b32 s0, s0, s1 286; GFX8-NEXT: ; return to shader part epilog 287; 288; GFX9-LABEL: s_fshr_i8: 289; GFX9: ; %bb.0: 290; GFX9-NEXT: s_and_b32 s1, s1, 0xff 291; GFX9-NEXT: s_and_b32 s3, s2, 7 292; GFX9-NEXT: s_andn2_b32 s2, 7, s2 293; GFX9-NEXT: s_lshl_b32 s0, s0, 1 294; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 295; GFX9-NEXT: s_lshl_b32 s0, s0, s2 296; GFX9-NEXT: s_lshr_b32 s1, s1, s3 297; GFX9-NEXT: s_or_b32 s0, s0, s1 298; GFX9-NEXT: ; return to shader part epilog 299; 300; GFX10-LABEL: s_fshr_i8: 301; GFX10: ; %bb.0: 302; GFX10-NEXT: s_and_b32 s1, s1, 0xff 303; GFX10-NEXT: s_and_b32 s3, s2, 7 304; GFX10-NEXT: s_andn2_b32 s2, 7, s2 305; GFX10-NEXT: s_lshl_b32 s0, s0, 1 306; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 307; GFX10-NEXT: s_lshl_b32 s0, s0, s2 308; GFX10-NEXT: s_lshr_b32 s1, s1, s3 309; GFX10-NEXT: s_or_b32 s0, s0, s1 310; GFX10-NEXT: ; return to shader part epilog 311 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt) 312 ret i8 %result 313} 314 315define i8 @v_fshr_i8(i8 %lhs, i8 %rhs, i8 %amt) { 316; GFX6-LABEL: v_fshr_i8: 317; GFX6: ; %bb.0: 318; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 319; GFX6-NEXT: v_and_b32_e32 v3, 7, v2 320; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 321; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 322; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 323; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 324; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 325; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1 326; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 327; GFX6-NEXT: s_setpc_b64 s[30:31] 328; 329; GFX8-LABEL: v_fshr_i8: 330; GFX8: ; %bb.0: 331; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 332; GFX8-NEXT: v_and_b32_e32 v3, 7, v2 333; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 334; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 335; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 336; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 337; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 338; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 339; GFX8-NEXT: s_setpc_b64 s[30:31] 340; 341; GFX9-LABEL: v_fshr_i8: 342; GFX9: ; %bb.0: 343; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 344; GFX9-NEXT: v_and_b32_e32 v3, 7, v2 345; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 346; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 347; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 348; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 349; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 350; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 351; GFX9-NEXT: s_setpc_b64 s[30:31] 352; 353; GFX10-LABEL: v_fshr_i8: 354; GFX10: ; %bb.0: 355; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 356; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 357; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 358; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 359; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 360; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 361; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 362; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 363; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 364; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 365; GFX10-NEXT: s_setpc_b64 s[30:31] 366 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt) 367 ret i8 %result 368} 369 370define amdgpu_ps i8 @s_fshr_i8_4(i8 inreg %lhs, i8 inreg %rhs) { 371; GFX6-LABEL: s_fshr_i8_4: 372; GFX6: ; %bb.0: 373; GFX6-NEXT: s_and_b32 s1, s1, 0xff 374; GFX6-NEXT: s_lshl_b32 s0, s0, 4 375; GFX6-NEXT: s_lshr_b32 s1, s1, 4 376; GFX6-NEXT: s_or_b32 s0, s0, s1 377; GFX6-NEXT: ; return to shader part epilog 378; 379; GFX8-LABEL: s_fshr_i8_4: 380; GFX8: ; %bb.0: 381; GFX8-NEXT: s_and_b32 s1, s1, 0xff 382; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 383; GFX8-NEXT: s_lshl_b32 s0, s0, 4 384; GFX8-NEXT: s_lshr_b32 s1, s1, 4 385; GFX8-NEXT: s_or_b32 s0, s0, s1 386; GFX8-NEXT: ; return to shader part epilog 387; 388; GFX9-LABEL: s_fshr_i8_4: 389; GFX9: ; %bb.0: 390; GFX9-NEXT: s_and_b32 s1, s1, 0xff 391; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 392; GFX9-NEXT: s_lshl_b32 s0, s0, 4 393; GFX9-NEXT: s_lshr_b32 s1, s1, 4 394; GFX9-NEXT: s_or_b32 s0, s0, s1 395; GFX9-NEXT: ; return to shader part epilog 396; 397; GFX10-LABEL: s_fshr_i8_4: 398; GFX10: ; %bb.0: 399; GFX10-NEXT: s_and_b32 s1, s1, 0xff 400; GFX10-NEXT: s_lshl_b32 s0, s0, 4 401; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 402; GFX10-NEXT: s_lshr_b32 s1, s1, 4 403; GFX10-NEXT: s_or_b32 s0, s0, s1 404; GFX10-NEXT: ; return to shader part epilog 405 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4) 406 ret i8 %result 407} 408 409define i8 @v_fshr_i8_4(i8 %lhs, i8 %rhs) { 410; GFX6-LABEL: v_fshr_i8_4: 411; GFX6: ; %bb.0: 412; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 413; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 414; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0 415; GFX6-NEXT: v_lshrrev_b32_e32 v1, 4, v1 416; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 417; GFX6-NEXT: s_setpc_b64 s[30:31] 418; 419; GFX8-LABEL: v_fshr_i8_4: 420; GFX8: ; %bb.0: 421; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 422; GFX8-NEXT: v_mov_b32_e32 v2, 4 423; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0 424; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 425; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 426; GFX8-NEXT: s_setpc_b64 s[30:31] 427; 428; GFX9-LABEL: v_fshr_i8_4: 429; GFX9: ; %bb.0: 430; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 431; GFX9-NEXT: s_mov_b32 s4, 4 432; GFX9-NEXT: v_lshlrev_b16_e32 v0, 4, v0 433; GFX9-NEXT: v_lshrrev_b16_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 434; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 435; GFX9-NEXT: s_setpc_b64 s[30:31] 436; 437; GFX10-LABEL: v_fshr_i8_4: 438; GFX10: ; %bb.0: 439; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 440; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 441; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 442; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0 443; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 444; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 445; GFX10-NEXT: s_setpc_b64 s[30:31] 446 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4) 447 ret i8 %result 448} 449 450define amdgpu_ps i8 @s_fshr_i8_5(i8 inreg %lhs, i8 inreg %rhs) { 451; GFX6-LABEL: s_fshr_i8_5: 452; GFX6: ; %bb.0: 453; GFX6-NEXT: s_and_b32 s1, s1, 0xff 454; GFX6-NEXT: s_lshl_b32 s0, s0, 3 455; GFX6-NEXT: s_lshr_b32 s1, s1, 5 456; GFX6-NEXT: s_or_b32 s0, s0, s1 457; GFX6-NEXT: ; return to shader part epilog 458; 459; GFX8-LABEL: s_fshr_i8_5: 460; GFX8: ; %bb.0: 461; GFX8-NEXT: s_and_b32 s1, s1, 0xff 462; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 463; GFX8-NEXT: s_lshl_b32 s0, s0, 3 464; GFX8-NEXT: s_lshr_b32 s1, s1, 5 465; GFX8-NEXT: s_or_b32 s0, s0, s1 466; GFX8-NEXT: ; return to shader part epilog 467; 468; GFX9-LABEL: s_fshr_i8_5: 469; GFX9: ; %bb.0: 470; GFX9-NEXT: s_and_b32 s1, s1, 0xff 471; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 472; GFX9-NEXT: s_lshl_b32 s0, s0, 3 473; GFX9-NEXT: s_lshr_b32 s1, s1, 5 474; GFX9-NEXT: s_or_b32 s0, s0, s1 475; GFX9-NEXT: ; return to shader part epilog 476; 477; GFX10-LABEL: s_fshr_i8_5: 478; GFX10: ; %bb.0: 479; GFX10-NEXT: s_and_b32 s1, s1, 0xff 480; GFX10-NEXT: s_lshl_b32 s0, s0, 3 481; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 482; GFX10-NEXT: s_lshr_b32 s1, s1, 5 483; GFX10-NEXT: s_or_b32 s0, s0, s1 484; GFX10-NEXT: ; return to shader part epilog 485 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5) 486 ret i8 %result 487} 488 489define i8 @v_fshr_i8_5(i8 %lhs, i8 %rhs) { 490; GFX6-LABEL: v_fshr_i8_5: 491; GFX6: ; %bb.0: 492; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 493; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 494; GFX6-NEXT: v_lshlrev_b32_e32 v0, 3, v0 495; GFX6-NEXT: v_lshrrev_b32_e32 v1, 5, v1 496; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 497; GFX6-NEXT: s_setpc_b64 s[30:31] 498; 499; GFX8-LABEL: v_fshr_i8_5: 500; GFX8: ; %bb.0: 501; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 502; GFX8-NEXT: v_mov_b32_e32 v2, 5 503; GFX8-NEXT: v_lshlrev_b16_e32 v0, 3, v0 504; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 505; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 506; GFX8-NEXT: s_setpc_b64 s[30:31] 507; 508; GFX9-LABEL: v_fshr_i8_5: 509; GFX9: ; %bb.0: 510; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 511; GFX9-NEXT: v_mov_b32_e32 v2, 5 512; GFX9-NEXT: v_lshlrev_b16_e32 v0, 3, v0 513; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 514; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 515; GFX9-NEXT: s_setpc_b64 s[30:31] 516; 517; GFX10-LABEL: v_fshr_i8_5: 518; GFX10: ; %bb.0: 519; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 520; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 521; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 522; GFX10-NEXT: v_lshlrev_b16 v0, 3, v0 523; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1 524; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 525; GFX10-NEXT: s_setpc_b64 s[30:31] 526 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5) 527 ret i8 %result 528} 529 530define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) { 531; GFX6-LABEL: s_fshr_v2i8: 532; GFX6: ; %bb.0: 533; GFX6-NEXT: s_lshr_b32 s3, s0, 8 534; GFX6-NEXT: s_lshr_b32 s4, s2, 8 535; GFX6-NEXT: s_and_b32 s5, s2, 7 536; GFX6-NEXT: s_andn2_b32 s2, 7, s2 537; GFX6-NEXT: s_movk_i32 s6, 0xff 538; GFX6-NEXT: s_lshl_b32 s0, s0, 1 539; GFX6-NEXT: s_lshl_b32 s0, s0, s2 540; GFX6-NEXT: s_and_b32 s2, s1, s6 541; GFX6-NEXT: s_lshr_b32 s2, s2, s5 542; GFX6-NEXT: s_or_b32 s0, s0, s2 543; GFX6-NEXT: s_and_b32 s2, s4, 7 544; GFX6-NEXT: s_andn2_b32 s4, 7, s4 545; GFX6-NEXT: s_lshl_b32 s3, s3, 1 546; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80008 547; GFX6-NEXT: s_lshl_b32 s3, s3, s4 548; GFX6-NEXT: s_lshr_b32 s1, s1, s2 549; GFX6-NEXT: s_or_b32 s1, s3, s1 550; GFX6-NEXT: s_and_b32 s1, s1, s6 551; GFX6-NEXT: s_and_b32 s0, s0, s6 552; GFX6-NEXT: s_lshl_b32 s1, s1, 8 553; GFX6-NEXT: s_or_b32 s0, s0, s1 554; GFX6-NEXT: ; return to shader part epilog 555; 556; GFX8-LABEL: s_fshr_v2i8: 557; GFX8: ; %bb.0: 558; GFX8-NEXT: s_lshr_b32 s3, s0, 8 559; GFX8-NEXT: s_lshr_b32 s5, s2, 8 560; GFX8-NEXT: s_and_b32 s6, s2, 7 561; GFX8-NEXT: s_andn2_b32 s2, 7, s2 562; GFX8-NEXT: s_lshl_b32 s0, s0, 1 563; GFX8-NEXT: s_lshl_b32 s0, s0, s2 564; GFX8-NEXT: s_movk_i32 s2, 0xff 565; GFX8-NEXT: s_lshr_b32 s4, s1, 8 566; GFX8-NEXT: s_and_b32 s1, s1, s2 567; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 568; GFX8-NEXT: s_lshr_b32 s1, s1, s6 569; GFX8-NEXT: s_and_b32 s4, s4, s2 570; GFX8-NEXT: s_or_b32 s0, s0, s1 571; GFX8-NEXT: s_and_b32 s1, s5, 7 572; GFX8-NEXT: s_andn2_b32 s5, 7, s5 573; GFX8-NEXT: s_lshl_b32 s3, s3, 1 574; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 575; GFX8-NEXT: s_lshl_b32 s3, s3, s5 576; GFX8-NEXT: s_lshr_b32 s1, s4, s1 577; GFX8-NEXT: s_or_b32 s1, s3, s1 578; GFX8-NEXT: s_and_b32 s0, s0, s2 579; GFX8-NEXT: s_and_b32 s1, s1, s2 580; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000 581; GFX8-NEXT: s_lshl_b32 s1, s1, s2 582; GFX8-NEXT: s_or_b32 s0, s0, s1 583; GFX8-NEXT: ; return to shader part epilog 584; 585; GFX9-LABEL: s_fshr_v2i8: 586; GFX9: ; %bb.0: 587; GFX9-NEXT: s_lshr_b32 s3, s0, 8 588; GFX9-NEXT: s_lshr_b32 s5, s2, 8 589; GFX9-NEXT: s_and_b32 s6, s2, 7 590; GFX9-NEXT: s_andn2_b32 s2, 7, s2 591; GFX9-NEXT: s_lshl_b32 s0, s0, 1 592; GFX9-NEXT: s_lshl_b32 s0, s0, s2 593; GFX9-NEXT: s_movk_i32 s2, 0xff 594; GFX9-NEXT: s_lshr_b32 s4, s1, 8 595; GFX9-NEXT: s_and_b32 s1, s1, s2 596; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 597; GFX9-NEXT: s_lshr_b32 s1, s1, s6 598; GFX9-NEXT: s_and_b32 s4, s4, s2 599; GFX9-NEXT: s_or_b32 s0, s0, s1 600; GFX9-NEXT: s_and_b32 s1, s5, 7 601; GFX9-NEXT: s_andn2_b32 s5, 7, s5 602; GFX9-NEXT: s_lshl_b32 s3, s3, 1 603; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 604; GFX9-NEXT: s_lshl_b32 s3, s3, s5 605; GFX9-NEXT: s_lshr_b32 s1, s4, s1 606; GFX9-NEXT: s_or_b32 s1, s3, s1 607; GFX9-NEXT: s_and_b32 s0, s0, s2 608; GFX9-NEXT: s_and_b32 s1, s1, s2 609; GFX9-NEXT: s_bfe_u32 s2, 8, 0x100000 610; GFX9-NEXT: s_lshl_b32 s1, s1, s2 611; GFX9-NEXT: s_or_b32 s0, s0, s1 612; GFX9-NEXT: ; return to shader part epilog 613; 614; GFX10-LABEL: s_fshr_v2i8: 615; GFX10: ; %bb.0: 616; GFX10-NEXT: s_lshr_b32 s4, s1, 8 617; GFX10-NEXT: s_movk_i32 s7, 0xff 618; GFX10-NEXT: s_lshr_b32 s3, s0, 8 619; GFX10-NEXT: s_lshr_b32 s5, s2, 8 620; GFX10-NEXT: s_and_b32 s6, s2, 7 621; GFX10-NEXT: s_andn2_b32 s2, 7, s2 622; GFX10-NEXT: s_lshl_b32 s0, s0, 1 623; GFX10-NEXT: s_and_b32 s4, s4, s7 624; GFX10-NEXT: s_and_b32 s1, s1, s7 625; GFX10-NEXT: s_lshl_b32 s0, s0, s2 626; GFX10-NEXT: s_and_b32 s2, s5, 7 627; GFX10-NEXT: s_andn2_b32 s5, 7, s5 628; GFX10-NEXT: s_lshl_b32 s3, s3, 1 629; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000 630; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 631; GFX10-NEXT: s_lshl_b32 s3, s3, s5 632; GFX10-NEXT: s_lshr_b32 s2, s4, s2 633; GFX10-NEXT: s_lshr_b32 s1, s1, s6 634; GFX10-NEXT: s_or_b32 s2, s3, s2 635; GFX10-NEXT: s_or_b32 s0, s0, s1 636; GFX10-NEXT: s_and_b32 s1, s2, s7 637; GFX10-NEXT: s_bfe_u32 s2, 8, 0x100000 638; GFX10-NEXT: s_and_b32 s0, s0, s7 639; GFX10-NEXT: s_lshl_b32 s1, s1, s2 640; GFX10-NEXT: s_or_b32 s0, s0, s1 641; GFX10-NEXT: ; return to shader part epilog 642 %lhs = bitcast i16 %lhs.arg to <2 x i8> 643 %rhs = bitcast i16 %rhs.arg to <2 x i8> 644 %amt = bitcast i16 %amt.arg to <2 x i8> 645 %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt) 646 %cast.result = bitcast <2 x i8> %result to i16 647 ret i16 %cast.result 648} 649 650define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) { 651; GFX6-LABEL: v_fshr_v2i8: 652; GFX6: ; %bb.0: 653; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 654; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v2 655; GFX6-NEXT: v_and_b32_e32 v5, 7, v2 656; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 657; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 658; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 659; GFX6-NEXT: s_movk_i32 s4, 0xff 660; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 661; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 662; GFX6-NEXT: v_and_b32_e32 v2, s4, v1 663; GFX6-NEXT: v_lshrrev_b32_e32 v2, v5, v2 664; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 665; GFX6-NEXT: v_and_b32_e32 v2, 7, v4 666; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 667; GFX6-NEXT: v_and_b32_e32 v4, 7, v4 668; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 669; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8 670; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 671; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 672; GFX6-NEXT: v_or_b32_e32 v1, v3, v1 673; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 674; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0 675; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 676; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 677; GFX6-NEXT: s_setpc_b64 s[30:31] 678; 679; GFX8-LABEL: v_fshr_v2i8: 680; GFX8: ; %bb.0: 681; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 682; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 683; GFX8-NEXT: v_and_b32_e32 v6, 7, v2 684; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 685; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 686; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 687; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 688; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 689; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 690; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 691; GFX8-NEXT: v_xor_b32_e32 v2, -1, v5 692; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 693; GFX8-NEXT: v_and_b32_e32 v1, 7, v5 694; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 695; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3 696; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v3 697; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 698; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 699; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1 700; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 701; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 702; GFX8-NEXT: s_setpc_b64 s[30:31] 703; 704; GFX9-LABEL: v_fshr_v2i8: 705; GFX9: ; %bb.0: 706; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 707; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 708; GFX9-NEXT: v_and_b32_e32 v6, 7, v2 709; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 710; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 711; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 712; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 713; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 714; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 715; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 716; GFX9-NEXT: v_xor_b32_e32 v2, -1, v5 717; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 718; GFX9-NEXT: v_and_b32_e32 v1, 7, v5 719; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 720; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3 721; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v3 722; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 723; GFX9-NEXT: v_or_b32_e32 v1, v2, v1 724; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1 725; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 726; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 727; GFX9-NEXT: s_setpc_b64 s[30:31] 728; 729; GFX10-LABEL: v_fshr_v2i8: 730; GFX10: ; %bb.0: 731; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 732; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 733; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2 734; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 735; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1 736; GFX10-NEXT: s_movk_i32 s4, 0xff 737; GFX10-NEXT: v_and_b32_e32 v7, 7, v2 738; GFX10-NEXT: v_xor_b32_e32 v6, -1, v3 739; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2 740; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 741; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 742; GFX10-NEXT: v_and_b32_e32 v5, s4, v5 743; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 744; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 745; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 746; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 747; GFX10-NEXT: v_lshrrev_b16 v3, v3, v5 748; GFX10-NEXT: v_lshlrev_b16 v4, v6, v4 749; GFX10-NEXT: v_lshrrev_b16 v1, v7, v1 750; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 751; GFX10-NEXT: v_or_b32_e32 v2, v4, v3 752; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 753; GFX10-NEXT: v_and_b32_sdwa v1, v2, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 754; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 755; GFX10-NEXT: s_setpc_b64 s[30:31] 756 %lhs = bitcast i16 %lhs.arg to <2 x i8> 757 %rhs = bitcast i16 %rhs.arg to <2 x i8> 758 %amt = bitcast i16 %amt.arg to <2 x i8> 759 %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt) 760 %cast.result = bitcast <2 x i8> %result to i16 761 ret i16 %cast.result 762} 763 764define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) { 765; GFX6-LABEL: s_fshr_v4i8: 766; GFX6: ; %bb.0: 767; GFX6-NEXT: s_lshr_b32 s3, s0, 8 768; GFX6-NEXT: s_lshr_b32 s4, s0, 16 769; GFX6-NEXT: s_lshr_b32 s5, s0, 24 770; GFX6-NEXT: s_lshr_b32 s7, s2, 8 771; GFX6-NEXT: s_lshr_b32 s8, s2, 16 772; GFX6-NEXT: s_lshr_b32 s9, s2, 24 773; GFX6-NEXT: s_and_b32 s10, s2, 7 774; GFX6-NEXT: s_andn2_b32 s2, 7, s2 775; GFX6-NEXT: s_movk_i32 s11, 0xff 776; GFX6-NEXT: s_lshl_b32 s0, s0, 1 777; GFX6-NEXT: s_lshl_b32 s0, s0, s2 778; GFX6-NEXT: s_and_b32 s2, s1, s11 779; GFX6-NEXT: s_lshr_b32 s2, s2, s10 780; GFX6-NEXT: s_or_b32 s0, s0, s2 781; GFX6-NEXT: s_and_b32 s2, s7, 7 782; GFX6-NEXT: s_andn2_b32 s7, 7, s7 783; GFX6-NEXT: s_lshl_b32 s3, s3, 1 784; GFX6-NEXT: s_lshl_b32 s3, s3, s7 785; GFX6-NEXT: s_bfe_u32 s7, s1, 0x80008 786; GFX6-NEXT: s_lshr_b32 s2, s7, s2 787; GFX6-NEXT: s_lshr_b32 s6, s1, 24 788; GFX6-NEXT: s_or_b32 s2, s3, s2 789; GFX6-NEXT: s_and_b32 s3, s8, 7 790; GFX6-NEXT: s_andn2_b32 s7, 7, s8 791; GFX6-NEXT: s_lshl_b32 s4, s4, 1 792; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80010 793; GFX6-NEXT: s_lshl_b32 s4, s4, s7 794; GFX6-NEXT: s_lshr_b32 s1, s1, s3 795; GFX6-NEXT: s_or_b32 s1, s4, s1 796; GFX6-NEXT: s_and_b32 s3, s9, 7 797; GFX6-NEXT: s_andn2_b32 s4, 7, s9 798; GFX6-NEXT: s_lshl_b32 s5, s5, 1 799; GFX6-NEXT: s_and_b32 s2, s2, s11 800; GFX6-NEXT: s_lshl_b32 s4, s5, s4 801; GFX6-NEXT: s_lshr_b32 s3, s6, s3 802; GFX6-NEXT: s_and_b32 s0, s0, s11 803; GFX6-NEXT: s_lshl_b32 s2, s2, 8 804; GFX6-NEXT: s_and_b32 s1, s1, s11 805; GFX6-NEXT: s_or_b32 s3, s4, s3 806; GFX6-NEXT: s_or_b32 s0, s0, s2 807; GFX6-NEXT: s_lshl_b32 s1, s1, 16 808; GFX6-NEXT: s_or_b32 s0, s0, s1 809; GFX6-NEXT: s_and_b32 s1, s3, s11 810; GFX6-NEXT: s_lshl_b32 s1, s1, 24 811; GFX6-NEXT: s_or_b32 s0, s0, s1 812; GFX6-NEXT: ; return to shader part epilog 813; 814; GFX8-LABEL: s_fshr_v4i8: 815; GFX8: ; %bb.0: 816; GFX8-NEXT: s_movk_i32 s13, 0xff 817; GFX8-NEXT: s_lshr_b32 s3, s0, 8 818; GFX8-NEXT: s_lshr_b32 s4, s0, 16 819; GFX8-NEXT: s_lshr_b32 s5, s0, 24 820; GFX8-NEXT: s_lshr_b32 s6, s1, 8 821; GFX8-NEXT: s_lshr_b32 s7, s1, 16 822; GFX8-NEXT: s_lshr_b32 s8, s1, 24 823; GFX8-NEXT: s_lshr_b32 s9, s2, 8 824; GFX8-NEXT: s_lshr_b32 s10, s2, 16 825; GFX8-NEXT: s_lshr_b32 s11, s2, 24 826; GFX8-NEXT: s_and_b32 s12, s2, 7 827; GFX8-NEXT: s_andn2_b32 s2, 7, s2 828; GFX8-NEXT: s_lshl_b32 s0, s0, 1 829; GFX8-NEXT: s_and_b32 s1, s1, s13 830; GFX8-NEXT: s_lshl_b32 s0, s0, s2 831; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 832; GFX8-NEXT: s_andn2_b32 s2, 7, s9 833; GFX8-NEXT: s_lshl_b32 s3, s3, 1 834; GFX8-NEXT: s_lshr_b32 s1, s1, s12 835; GFX8-NEXT: s_lshl_b32 s2, s3, s2 836; GFX8-NEXT: s_and_b32 s3, s6, s13 837; GFX8-NEXT: s_or_b32 s0, s0, s1 838; GFX8-NEXT: s_and_b32 s1, s9, 7 839; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 840; GFX8-NEXT: s_lshr_b32 s1, s3, s1 841; GFX8-NEXT: s_andn2_b32 s3, 7, s10 842; GFX8-NEXT: s_lshl_b32 s4, s4, 1 843; GFX8-NEXT: s_lshl_b32 s3, s4, s3 844; GFX8-NEXT: s_and_b32 s4, s7, s13 845; GFX8-NEXT: s_or_b32 s1, s2, s1 846; GFX8-NEXT: s_and_b32 s2, s10, 7 847; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 848; GFX8-NEXT: s_lshr_b32 s2, s4, s2 849; GFX8-NEXT: s_and_b32 s1, s1, s13 850; GFX8-NEXT: s_or_b32 s2, s3, s2 851; GFX8-NEXT: s_and_b32 s3, s11, 7 852; GFX8-NEXT: s_andn2_b32 s4, 7, s11 853; GFX8-NEXT: s_lshl_b32 s5, s5, 1 854; GFX8-NEXT: s_and_b32 s0, s0, s13 855; GFX8-NEXT: s_lshl_b32 s1, s1, 8 856; GFX8-NEXT: s_lshl_b32 s4, s5, s4 857; GFX8-NEXT: s_lshr_b32 s3, s8, s3 858; GFX8-NEXT: s_or_b32 s0, s0, s1 859; GFX8-NEXT: s_and_b32 s1, s2, s13 860; GFX8-NEXT: s_or_b32 s3, s4, s3 861; GFX8-NEXT: s_lshl_b32 s1, s1, 16 862; GFX8-NEXT: s_or_b32 s0, s0, s1 863; GFX8-NEXT: s_and_b32 s1, s3, s13 864; GFX8-NEXT: s_lshl_b32 s1, s1, 24 865; GFX8-NEXT: s_or_b32 s0, s0, s1 866; GFX8-NEXT: ; return to shader part epilog 867; 868; GFX9-LABEL: s_fshr_v4i8: 869; GFX9: ; %bb.0: 870; GFX9-NEXT: s_movk_i32 s13, 0xff 871; GFX9-NEXT: s_lshr_b32 s3, s0, 8 872; GFX9-NEXT: s_lshr_b32 s4, s0, 16 873; GFX9-NEXT: s_lshr_b32 s5, s0, 24 874; GFX9-NEXT: s_lshr_b32 s6, s1, 8 875; GFX9-NEXT: s_lshr_b32 s7, s1, 16 876; GFX9-NEXT: s_lshr_b32 s8, s1, 24 877; GFX9-NEXT: s_lshr_b32 s9, s2, 8 878; GFX9-NEXT: s_lshr_b32 s10, s2, 16 879; GFX9-NEXT: s_lshr_b32 s11, s2, 24 880; GFX9-NEXT: s_and_b32 s12, s2, 7 881; GFX9-NEXT: s_andn2_b32 s2, 7, s2 882; GFX9-NEXT: s_lshl_b32 s0, s0, 1 883; GFX9-NEXT: s_and_b32 s1, s1, s13 884; GFX9-NEXT: s_lshl_b32 s0, s0, s2 885; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 886; GFX9-NEXT: s_andn2_b32 s2, 7, s9 887; GFX9-NEXT: s_lshl_b32 s3, s3, 1 888; GFX9-NEXT: s_lshr_b32 s1, s1, s12 889; GFX9-NEXT: s_lshl_b32 s2, s3, s2 890; GFX9-NEXT: s_and_b32 s3, s6, s13 891; GFX9-NEXT: s_or_b32 s0, s0, s1 892; GFX9-NEXT: s_and_b32 s1, s9, 7 893; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000 894; GFX9-NEXT: s_lshr_b32 s1, s3, s1 895; GFX9-NEXT: s_andn2_b32 s3, 7, s10 896; GFX9-NEXT: s_lshl_b32 s4, s4, 1 897; GFX9-NEXT: s_lshl_b32 s3, s4, s3 898; GFX9-NEXT: s_and_b32 s4, s7, s13 899; GFX9-NEXT: s_or_b32 s1, s2, s1 900; GFX9-NEXT: s_and_b32 s2, s10, 7 901; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 902; GFX9-NEXT: s_lshr_b32 s2, s4, s2 903; GFX9-NEXT: s_and_b32 s1, s1, s13 904; GFX9-NEXT: s_or_b32 s2, s3, s2 905; GFX9-NEXT: s_and_b32 s3, s11, 7 906; GFX9-NEXT: s_andn2_b32 s4, 7, s11 907; GFX9-NEXT: s_lshl_b32 s5, s5, 1 908; GFX9-NEXT: s_and_b32 s0, s0, s13 909; GFX9-NEXT: s_lshl_b32 s1, s1, 8 910; GFX9-NEXT: s_lshl_b32 s4, s5, s4 911; GFX9-NEXT: s_lshr_b32 s3, s8, s3 912; GFX9-NEXT: s_or_b32 s0, s0, s1 913; GFX9-NEXT: s_and_b32 s1, s2, s13 914; GFX9-NEXT: s_or_b32 s3, s4, s3 915; GFX9-NEXT: s_lshl_b32 s1, s1, 16 916; GFX9-NEXT: s_or_b32 s0, s0, s1 917; GFX9-NEXT: s_and_b32 s1, s3, s13 918; GFX9-NEXT: s_lshl_b32 s1, s1, 24 919; GFX9-NEXT: s_or_b32 s0, s0, s1 920; GFX9-NEXT: ; return to shader part epilog 921; 922; GFX10-LABEL: s_fshr_v4i8: 923; GFX10: ; %bb.0: 924; GFX10-NEXT: s_lshr_b32 s6, s1, 8 925; GFX10-NEXT: s_movk_i32 s13, 0xff 926; GFX10-NEXT: s_lshr_b32 s3, s0, 8 927; GFX10-NEXT: s_lshr_b32 s4, s0, 16 928; GFX10-NEXT: s_lshr_b32 s5, s0, 24 929; GFX10-NEXT: s_lshr_b32 s7, s1, 16 930; GFX10-NEXT: s_lshr_b32 s8, s1, 24 931; GFX10-NEXT: s_lshr_b32 s9, s2, 8 932; GFX10-NEXT: s_lshr_b32 s10, s2, 16 933; GFX10-NEXT: s_lshr_b32 s11, s2, 24 934; GFX10-NEXT: s_and_b32 s12, s2, 7 935; GFX10-NEXT: s_andn2_b32 s2, 7, s2 936; GFX10-NEXT: s_and_b32 s1, s1, s13 937; GFX10-NEXT: s_lshl_b32 s0, s0, 1 938; GFX10-NEXT: s_and_b32 s6, s6, s13 939; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 940; GFX10-NEXT: s_lshl_b32 s0, s0, s2 941; GFX10-NEXT: s_and_b32 s2, s9, 7 942; GFX10-NEXT: s_andn2_b32 s9, 7, s9 943; GFX10-NEXT: s_lshl_b32 s3, s3, 1 944; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 945; GFX10-NEXT: s_lshr_b32 s1, s1, s12 946; GFX10-NEXT: s_lshl_b32 s3, s3, s9 947; GFX10-NEXT: s_lshr_b32 s2, s6, s2 948; GFX10-NEXT: s_and_b32 s6, s7, s13 949; GFX10-NEXT: s_or_b32 s0, s0, s1 950; GFX10-NEXT: s_or_b32 s1, s3, s2 951; GFX10-NEXT: s_and_b32 s2, s10, 7 952; GFX10-NEXT: s_andn2_b32 s3, 7, s10 953; GFX10-NEXT: s_lshl_b32 s4, s4, 1 954; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 955; GFX10-NEXT: s_lshl_b32 s3, s4, s3 956; GFX10-NEXT: s_lshr_b32 s2, s6, s2 957; GFX10-NEXT: s_andn2_b32 s4, 7, s11 958; GFX10-NEXT: s_lshl_b32 s5, s5, 1 959; GFX10-NEXT: s_and_b32 s6, s11, 7 960; GFX10-NEXT: s_lshl_b32 s4, s5, s4 961; GFX10-NEXT: s_lshr_b32 s5, s8, s6 962; GFX10-NEXT: s_or_b32 s2, s3, s2 963; GFX10-NEXT: s_and_b32 s1, s1, s13 964; GFX10-NEXT: s_or_b32 s3, s4, s5 965; GFX10-NEXT: s_and_b32 s0, s0, s13 966; GFX10-NEXT: s_lshl_b32 s1, s1, 8 967; GFX10-NEXT: s_and_b32 s2, s2, s13 968; GFX10-NEXT: s_or_b32 s0, s0, s1 969; GFX10-NEXT: s_lshl_b32 s1, s2, 16 970; GFX10-NEXT: s_and_b32 s2, s3, s13 971; GFX10-NEXT: s_or_b32 s0, s0, s1 972; GFX10-NEXT: s_lshl_b32 s1, s2, 24 973; GFX10-NEXT: s_or_b32 s0, s0, s1 974; GFX10-NEXT: ; return to shader part epilog 975 %lhs = bitcast i32 %lhs.arg to <4 x i8> 976 %rhs = bitcast i32 %rhs.arg to <4 x i8> 977 %amt = bitcast i32 %amt.arg to <4 x i8> 978 %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt) 979 %cast.result = bitcast <4 x i8> %result to i32 980 ret i32 %cast.result 981} 982 983define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { 984; GFX6-LABEL: v_fshr_v4i8: 985; GFX6: ; %bb.0: 986; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 987; GFX6-NEXT: v_lshrrev_b32_e32 v7, 8, v2 988; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v2 989; GFX6-NEXT: v_lshrrev_b32_e32 v9, 24, v2 990; GFX6-NEXT: v_and_b32_e32 v10, 7, v2 991; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 992; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 993; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0 994; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0 995; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 996; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 997; GFX6-NEXT: v_and_b32_e32 v11, 0xff, v1 998; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 999; GFX6-NEXT: v_lshrrev_b32_e32 v10, v10, v11 1000; GFX6-NEXT: v_or_b32_e32 v0, v0, v10 1001; GFX6-NEXT: v_and_b32_e32 v10, 7, v7 1002; GFX6-NEXT: v_xor_b32_e32 v7, -1, v7 1003; GFX6-NEXT: v_and_b32_e32 v7, 7, v7 1004; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 1005; GFX6-NEXT: v_lshlrev_b32_e32 v3, v7, v3 1006; GFX6-NEXT: v_bfe_u32 v7, v1, 8, 8 1007; GFX6-NEXT: v_lshrrev_b32_e32 v7, v10, v7 1008; GFX6-NEXT: v_or_b32_e32 v3, v3, v7 1009; GFX6-NEXT: v_and_b32_e32 v7, 7, v8 1010; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 1011; GFX6-NEXT: v_lshrrev_b32_e32 v6, 24, v1 1012; GFX6-NEXT: v_and_b32_e32 v8, 7, v8 1013; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 1014; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 1015; GFX6-NEXT: v_mov_b32_e32 v2, 0xff 1016; GFX6-NEXT: v_lshlrev_b32_e32 v4, v8, v4 1017; GFX6-NEXT: v_lshrrev_b32_e32 v1, v7, v1 1018; GFX6-NEXT: v_xor_b32_e32 v7, -1, v9 1019; GFX6-NEXT: v_or_b32_e32 v1, v4, v1 1020; GFX6-NEXT: v_and_b32_e32 v4, 7, v9 1021; GFX6-NEXT: v_and_b32_e32 v7, 7, v7 1022; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5 1023; GFX6-NEXT: v_and_b32_e32 v3, v3, v2 1024; GFX6-NEXT: v_lshlrev_b32_e32 v5, v7, v5 1025; GFX6-NEXT: v_lshrrev_b32_e32 v4, v4, v6 1026; GFX6-NEXT: v_and_b32_e32 v0, v0, v2 1027; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1028; GFX6-NEXT: v_and_b32_e32 v1, v1, v2 1029; GFX6-NEXT: v_or_b32_e32 v4, v5, v4 1030; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 1031; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1032; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1033; GFX6-NEXT: v_and_b32_e32 v1, v4, v2 1034; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 1035; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1036; GFX6-NEXT: s_setpc_b64 s[30:31] 1037; 1038; GFX8-LABEL: v_fshr_v4i8: 1039; GFX8: ; %bb.0: 1040; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1041; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 1042; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2 1043; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v2 1044; GFX8-NEXT: v_and_b32_e32 v8, 7, v2 1045; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 1046; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 1047; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v0 1048; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v9 1049; GFX8-NEXT: v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1050; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1051; GFX8-NEXT: v_or_b32_e32 v2, v2, v8 1052; GFX8-NEXT: v_and_b32_e32 v8, 7, v5 1053; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 1054; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 1055; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 1056; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1057; GFX8-NEXT: v_lshlrev_b16_e32 v3, v5, v3 1058; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1059; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 1060; GFX8-NEXT: v_and_b32_e32 v4, 7, v6 1061; GFX8-NEXT: v_xor_b32_e32 v5, -1, v6 1062; GFX8-NEXT: v_mov_b32_e32 v6, 1 1063; GFX8-NEXT: v_mov_b32_e32 v9, 0xff 1064; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 1065; GFX8-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1066; GFX8-NEXT: v_lshlrev_b16_e32 v5, v5, v8 1067; GFX8-NEXT: v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1068; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v8 1069; GFX8-NEXT: v_or_b32_e32 v4, v5, v4 1070; GFX8-NEXT: v_and_b32_e32 v5, 7, v7 1071; GFX8-NEXT: v_xor_b32_e32 v7, -1, v7 1072; GFX8-NEXT: v_and_b32_e32 v7, 7, v7 1073; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1074; GFX8-NEXT: v_lshlrev_b16_e32 v0, v7, v0 1075; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1076; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1077; GFX8-NEXT: v_mov_b32_e32 v1, 8 1078; GFX8-NEXT: s_movk_i32 s4, 0xff 1079; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1080; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1081; GFX8-NEXT: v_and_b32_e32 v2, s4, v4 1082; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1083; GFX8-NEXT: v_and_b32_e32 v0, s4, v0 1084; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 1085; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1086; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 1087; GFX8-NEXT: s_setpc_b64 s[30:31] 1088; 1089; GFX9-LABEL: v_fshr_v4i8: 1090; GFX9: ; %bb.0: 1091; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1092; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 1093; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2 1094; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2 1095; GFX9-NEXT: v_and_b32_e32 v8, 7, v2 1096; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 1097; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 1098; GFX9-NEXT: v_lshlrev_b16_e32 v9, 1, v0 1099; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v9 1100; GFX9-NEXT: v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1101; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1102; GFX9-NEXT: v_or_b32_e32 v2, v2, v8 1103; GFX9-NEXT: v_and_b32_e32 v8, 7, v5 1104; GFX9-NEXT: v_xor_b32_e32 v5, -1, v5 1105; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 1106; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 1107; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1108; GFX9-NEXT: v_lshlrev_b16_e32 v3, v5, v3 1109; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1110; GFX9-NEXT: v_or_b32_e32 v3, v3, v4 1111; GFX9-NEXT: v_and_b32_e32 v4, 7, v6 1112; GFX9-NEXT: v_xor_b32_e32 v5, -1, v6 1113; GFX9-NEXT: v_mov_b32_e32 v6, 1 1114; GFX9-NEXT: v_mov_b32_e32 v9, 0xff 1115; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 1116; GFX9-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1117; GFX9-NEXT: v_lshlrev_b16_e32 v5, v5, v8 1118; GFX9-NEXT: v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1119; GFX9-NEXT: v_lshrrev_b16_e32 v4, v4, v8 1120; GFX9-NEXT: v_or_b32_e32 v4, v5, v4 1121; GFX9-NEXT: v_and_b32_e32 v5, 7, v7 1122; GFX9-NEXT: v_xor_b32_e32 v7, -1, v7 1123; GFX9-NEXT: v_and_b32_e32 v7, 7, v7 1124; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1125; GFX9-NEXT: v_lshlrev_b16_e32 v0, v7, v0 1126; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1127; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 1128; GFX9-NEXT: v_mov_b32_e32 v1, 8 1129; GFX9-NEXT: s_movk_i32 s4, 0xff 1130; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1131; GFX9-NEXT: v_and_or_b32 v1, v2, s4, v1 1132; GFX9-NEXT: v_and_b32_e32 v2, s4, v4 1133; GFX9-NEXT: v_and_b32_e32 v0, s4, v0 1134; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1135; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1136; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 1137; GFX9-NEXT: s_setpc_b64 s[30:31] 1138; 1139; GFX10-LABEL: v_fshr_v4i8: 1140; GFX10: ; %bb.0: 1141; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1142; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1143; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v2 1144; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1145; GFX10-NEXT: v_xor_b32_e32 v8, -1, v2 1146; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 1147; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v2 1148; GFX10-NEXT: v_xor_b32_e32 v11, -1, v6 1149; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3 1150; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0 1151; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 1152; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v1 1153; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 1154; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 1155; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 1156; GFX10-NEXT: v_mov_b32_e32 v13, 0xff 1157; GFX10-NEXT: v_xor_b32_e32 v14, -1, v12 1158; GFX10-NEXT: v_lshlrev_b16 v3, v11, v3 1159; GFX10-NEXT: v_xor_b32_e32 v11, -1, v10 1160; GFX10-NEXT: s_movk_i32 s4, 0xff 1161; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1 1162; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 1163; GFX10-NEXT: v_and_b32_e32 v8, s4, v1 1164; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 1165; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 1166; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 1167; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 1168; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 1169; GFX10-NEXT: v_and_b32_sdwa v1, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1170; GFX10-NEXT: v_and_b32_e32 v13, 7, v14 1171; GFX10-NEXT: v_lshlrev_b16 v5, 1, v5 1172; GFX10-NEXT: v_and_b32_e32 v12, 7, v12 1173; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 1174; GFX10-NEXT: v_lshrrev_b16 v6, v6, v7 1175; GFX10-NEXT: v_lshlrev_b16 v4, v11, v4 1176; GFX10-NEXT: v_lshrrev_b16 v1, v10, v1 1177; GFX10-NEXT: v_lshlrev_b16 v5, v13, v5 1178; GFX10-NEXT: v_lshrrev_b16 v7, v12, v9 1179; GFX10-NEXT: v_lshrrev_b16 v2, v2, v8 1180; GFX10-NEXT: v_or_b32_e32 v3, v3, v6 1181; GFX10-NEXT: v_mov_b32_e32 v6, 8 1182; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 1183; GFX10-NEXT: v_or_b32_e32 v4, v5, v7 1184; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 1185; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1186; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 1187; GFX10-NEXT: v_and_b32_e32 v3, s4, v4 1188; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v2 1189; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1190; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 1191; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 1192; GFX10-NEXT: s_setpc_b64 s[30:31] 1193 %lhs = bitcast i32 %lhs.arg to <4 x i8> 1194 %rhs = bitcast i32 %rhs.arg to <4 x i8> 1195 %amt = bitcast i32 %amt.arg to <4 x i8> 1196 %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt) 1197 %cast.result = bitcast <4 x i8> %result to i32 1198 ret i32 %cast.result 1199} 1200 1201define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) { 1202; GFX6-LABEL: s_fshr_i24: 1203; GFX6: ; %bb.0: 1204; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1205; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 1206; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1207; GFX6-NEXT: s_mov_b32 s3, 0xffffff 1208; GFX6-NEXT: s_and_b32 s2, s2, s3 1209; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1210; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 1211; GFX6-NEXT: s_lshl_b32 s0, s0, 1 1212; GFX6-NEXT: s_and_b32 s1, s1, s3 1213; GFX6-NEXT: v_mul_lo_u32 v1, v1, v0 1214; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 1215; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 1216; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 1217; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 1218; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 1219; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0 1220; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1221; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1222; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0 1223; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1224; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1225; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0 1226; GFX6-NEXT: v_and_b32_e32 v0, s3, v0 1227; GFX6-NEXT: v_and_b32_e32 v1, s3, v1 1228; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 1229; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0 1230; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 1231; GFX6-NEXT: v_readfirstlane_b32 s0, v0 1232; GFX6-NEXT: ; return to shader part epilog 1233; 1234; GFX8-LABEL: s_fshr_i24: 1235; GFX8: ; %bb.0: 1236; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1237; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 1238; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1239; GFX8-NEXT: s_mov_b32 s3, 0xffffff 1240; GFX8-NEXT: s_and_b32 s2, s2, s3 1241; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1242; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 1243; GFX8-NEXT: s_lshl_b32 s0, s0, 1 1244; GFX8-NEXT: s_and_b32 s1, s1, s3 1245; GFX8-NEXT: v_mul_lo_u32 v1, v1, v0 1246; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 1247; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 1248; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0 1249; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 1250; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 1251; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0 1252; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1253; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1254; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0 1255; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1256; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1257; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0 1258; GFX8-NEXT: v_and_b32_e32 v0, s3, v0 1259; GFX8-NEXT: v_and_b32_e32 v1, s3, v1 1260; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0 1261; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1262; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 1263; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1264; GFX8-NEXT: ; return to shader part epilog 1265; 1266; GFX9-LABEL: s_fshr_i24: 1267; GFX9: ; %bb.0: 1268; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1269; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 1270; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1271; GFX9-NEXT: s_mov_b32 s3, 0xffffff 1272; GFX9-NEXT: s_and_b32 s2, s2, s3 1273; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1274; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 1275; GFX9-NEXT: s_and_b32 s1, s1, s3 1276; GFX9-NEXT: s_lshl_b32 s0, s0, 1 1277; GFX9-NEXT: v_mul_lo_u32 v1, v1, v0 1278; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 1279; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 1280; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 1281; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 1282; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0 1283; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0 1284; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1285; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1286; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0 1287; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1288; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1289; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0 1290; GFX9-NEXT: v_and_b32_e32 v0, s3, v0 1291; GFX9-NEXT: v_and_b32_e32 v1, s3, v1 1292; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1293; GFX9-NEXT: v_lshl_or_b32 v0, s0, v1, v0 1294; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1295; GFX9-NEXT: ; return to shader part epilog 1296; 1297; GFX10-LABEL: s_fshr_i24: 1298; GFX10: ; %bb.0: 1299; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1300; GFX10-NEXT: s_mov_b32 s3, 0xffffff 1301; GFX10-NEXT: s_lshl_b32 s0, s0, 1 1302; GFX10-NEXT: s_and_b32 s2, s2, s3 1303; GFX10-NEXT: s_and_b32 s1, s1, s3 1304; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 1305; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1306; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 1307; GFX10-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0 1308; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 1309; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 1310; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0 1311; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 1312; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0 1313; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0 1314; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1315; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1316; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0 1317; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1318; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1319; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0 1320; GFX10-NEXT: v_and_b32_e32 v0, s3, v0 1321; GFX10-NEXT: v_and_b32_e32 v1, s3, v1 1322; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1323; GFX10-NEXT: v_lshl_or_b32 v0, s0, v1, v0 1324; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1325; GFX10-NEXT: ; return to shader part epilog 1326 %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt) 1327 ret i24 %result 1328} 1329 1330define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) { 1331; GFX6-LABEL: v_fshr_i24: 1332; GFX6: ; %bb.0: 1333; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1334; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1335; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 1336; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffffe8 1337; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1338; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1339; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1340; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 1341; GFX6-NEXT: v_mul_lo_u32 v4, v4, v3 1342; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4 1343; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 1344; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 1345; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffff 1346; GFX6-NEXT: v_and_b32_e32 v1, v1, v4 1347; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24 1348; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 1349; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2 1350; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1351; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1352; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2 1353; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1354; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1355; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2 1356; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 1357; GFX6-NEXT: v_and_b32_e32 v3, v3, v4 1358; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 1359; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1360; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1361; GFX6-NEXT: s_setpc_b64 s[30:31] 1362; 1363; GFX8-LABEL: v_fshr_i24: 1364; GFX8: ; %bb.0: 1365; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1366; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1367; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 1368; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffffe8 1369; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1370; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1371; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1372; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 1373; GFX8-NEXT: v_mul_lo_u32 v4, v4, v3 1374; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4 1375; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4 1376; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3 1377; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffff 1378; GFX8-NEXT: v_and_b32_e32 v1, v1, v4 1379; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24 1380; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 1381; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2 1382; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1383; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1384; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2 1385; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1386; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1387; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2 1388; GFX8-NEXT: v_and_b32_e32 v2, v2, v4 1389; GFX8-NEXT: v_and_b32_e32 v3, v3, v4 1390; GFX8-NEXT: v_lshlrev_b32_e32 v0, v3, v0 1391; GFX8-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1392; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1393; GFX8-NEXT: s_setpc_b64 s[30:31] 1394; 1395; GFX9-LABEL: v_fshr_i24: 1396; GFX9: ; %bb.0: 1397; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1398; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1399; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 1400; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffffe8 1401; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1402; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1403; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1404; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 1405; GFX9-NEXT: v_mul_lo_u32 v4, v4, v3 1406; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 1407; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 1408; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 1409; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffff 1410; GFX9-NEXT: v_and_b32_e32 v1, v1, v4 1411; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24 1412; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 1413; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2 1414; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1415; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1416; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2 1417; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1418; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1419; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2 1420; GFX9-NEXT: v_and_b32_e32 v2, v2, v4 1421; GFX9-NEXT: v_and_b32_e32 v3, v3, v4 1422; GFX9-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1423; GFX9-NEXT: v_lshl_or_b32 v0, v0, v3, v1 1424; GFX9-NEXT: s_setpc_b64 s[30:31] 1425; 1426; GFX10-LABEL: v_fshr_i24: 1427; GFX10: ; %bb.0: 1428; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1429; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1430; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1431; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1432; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1433; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 1434; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1435; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 1436; GFX10-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3 1437; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4 1438; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 1439; GFX10-NEXT: v_mov_b32_e32 v4, 0xffffff 1440; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 1441; GFX10-NEXT: v_and_b32_e32 v1, v1, v4 1442; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24 1443; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 1444; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2 1445; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1446; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1447; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2 1448; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1449; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1450; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2 1451; GFX10-NEXT: v_and_b32_e32 v2, v2, v4 1452; GFX10-NEXT: v_and_b32_e32 v3, v3, v4 1453; GFX10-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1454; GFX10-NEXT: v_lshl_or_b32 v0, v0, v3, v1 1455; GFX10-NEXT: s_setpc_b64 s[30:31] 1456 %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt) 1457 ret i24 %result 1458} 1459 1460define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { 1461; GFX6-LABEL: s_fshr_v2i24: 1462; GFX6: ; %bb.0: 1463; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1464; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 1465; GFX6-NEXT: s_movk_i32 s9, 0xff 1466; GFX6-NEXT: s_mov_b32 s11, 0x80008 1467; GFX6-NEXT: s_lshr_b32 s6, s0, 16 1468; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1469; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 1470; GFX6-NEXT: s_lshr_b32 s7, s0, 24 1471; GFX6-NEXT: s_lshr_b32 s8, s1, 8 1472; GFX6-NEXT: s_and_b32 s10, s0, s9 1473; GFX6-NEXT: s_bfe_u32 s0, s0, s11 1474; GFX6-NEXT: s_and_b32 s1, s1, s9 1475; GFX6-NEXT: s_lshl_b32 s0, s0, 8 1476; GFX6-NEXT: s_lshl_b32 s1, s1, 8 1477; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1478; GFX6-NEXT: s_or_b32 s0, s10, s0 1479; GFX6-NEXT: s_or_b32 s1, s7, s1 1480; GFX6-NEXT: s_and_b32 s7, s8, s9 1481; GFX6-NEXT: s_lshr_b32 s8, s2, 16 1482; GFX6-NEXT: s_lshr_b32 s10, s2, 24 1483; GFX6-NEXT: s_and_b32 s13, s2, s9 1484; GFX6-NEXT: s_bfe_u32 s2, s2, s11 1485; GFX6-NEXT: v_mul_lo_u32 v2, v1, v0 1486; GFX6-NEXT: s_lshl_b32 s2, s2, 8 1487; GFX6-NEXT: s_and_b32 s8, s8, s9 1488; GFX6-NEXT: s_or_b32 s2, s13, s2 1489; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 1490; GFX6-NEXT: s_lshr_b32 s12, s3, 8 1491; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 1492; GFX6-NEXT: s_lshl_b32 s8, s8, 16 1493; GFX6-NEXT: s_and_b32 s3, s3, s9 1494; GFX6-NEXT: s_or_b32 s2, s2, s8 1495; GFX6-NEXT: s_lshl_b32 s3, s3, 8 1496; GFX6-NEXT: s_and_b32 s8, s12, s9 1497; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 1498; GFX6-NEXT: s_or_b32 s3, s10, s3 1499; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 1500; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000 1501; GFX6-NEXT: s_lshl_b32 s8, s8, 16 1502; GFX6-NEXT: s_or_b32 s3, s3, s8 1503; GFX6-NEXT: s_lshr_b32 s8, s4, 16 1504; GFX6-NEXT: s_lshr_b32 s10, s4, 24 1505; GFX6-NEXT: s_and_b32 s13, s4, s9 1506; GFX6-NEXT: s_bfe_u32 s4, s4, s11 1507; GFX6-NEXT: s_lshl_b32 s4, s4, 8 1508; GFX6-NEXT: s_and_b32 s8, s8, s9 1509; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 1510; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 1511; GFX6-NEXT: s_or_b32 s4, s13, s4 1512; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 1513; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 1514; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000 1515; GFX6-NEXT: s_lshl_b32 s8, s8, 16 1516; GFX6-NEXT: s_or_b32 s4, s4, s8 1517; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 1518; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 1519; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 1520; GFX6-NEXT: s_lshr_b32 s12, s5, 8 1521; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 1522; GFX6-NEXT: s_and_b32 s5, s5, s9 1523; GFX6-NEXT: v_mul_lo_u32 v1, v1, v2 1524; GFX6-NEXT: s_lshl_b32 s5, s5, 8 1525; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 1526; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 1527; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1528; GFX6-NEXT: v_mul_hi_u32 v1, v2, v1 1529; GFX6-NEXT: s_and_b32 s8, s12, s9 1530; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1531; GFX6-NEXT: s_or_b32 s5, s10, s5 1532; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 1533; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 1534; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000 1535; GFX6-NEXT: s_lshl_b32 s8, s8, 16 1536; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1537; GFX6-NEXT: s_or_b32 s5, s5, s8 1538; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1539; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 1540; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 1541; GFX6-NEXT: s_and_b32 s6, s6, s9 1542; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 1543; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000 1544; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 1545; GFX6-NEXT: s_mov_b32 s8, 0xffffff 1546; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0 1547; GFX6-NEXT: s_lshl_b32 s4, s6, 17 1548; GFX6-NEXT: s_lshl_b32 s0, s0, 1 1549; GFX6-NEXT: s_or_b32 s0, s4, s0 1550; GFX6-NEXT: v_and_b32_e32 v2, s8, v3 1551; GFX6-NEXT: v_and_b32_e32 v0, s8, v0 1552; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 1553; GFX6-NEXT: v_lshr_b32_e32 v0, s2, v0 1554; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 1555; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 1556; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 1557; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1558; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1559; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 1560; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1561; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 1562; GFX6-NEXT: s_bfe_u32 s7, s7, 0x100000 1563; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1564; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffff 1565; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1 1566; GFX6-NEXT: s_lshl_b32 s0, s7, 17 1567; GFX6-NEXT: s_lshl_b32 s1, s1, 1 1568; GFX6-NEXT: s_or_b32 s0, s0, s1 1569; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 1570; GFX6-NEXT: v_and_b32_e32 v1, v1, v4 1571; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 1572; GFX6-NEXT: v_lshr_b32_e32 v1, s3, v1 1573; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8 1574; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 1575; GFX6-NEXT: v_and_b32_e32 v2, s9, v0 1576; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1577; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 1578; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 1579; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1580; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 1581; GFX6-NEXT: v_and_b32_e32 v2, s9, v1 1582; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 1583; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 1584; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8 1585; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 1586; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1587; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 1588; GFX6-NEXT: v_readfirstlane_b32 s0, v0 1589; GFX6-NEXT: v_readfirstlane_b32 s1, v1 1590; GFX6-NEXT: ; return to shader part epilog 1591; 1592; GFX8-LABEL: s_fshr_v2i24: 1593; GFX8: ; %bb.0: 1594; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1595; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 1596; GFX8-NEXT: s_movk_i32 s10, 0xff 1597; GFX8-NEXT: s_lshr_b32 s9, s1, 8 1598; GFX8-NEXT: s_bfe_u32 s11, 8, 0x100000 1599; GFX8-NEXT: s_and_b32 s1, s1, s10 1600; GFX8-NEXT: s_lshr_b32 s6, s0, 8 1601; GFX8-NEXT: s_lshr_b32 s8, s0, 24 1602; GFX8-NEXT: s_lshl_b32 s1, s1, s11 1603; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1604; GFX8-NEXT: s_and_b32 s6, s6, s10 1605; GFX8-NEXT: s_or_b32 s1, s8, s1 1606; GFX8-NEXT: s_lshr_b32 s8, s2, 8 1607; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 1608; GFX8-NEXT: s_lshr_b32 s7, s0, 16 1609; GFX8-NEXT: s_and_b32 s0, s0, s10 1610; GFX8-NEXT: s_lshl_b32 s6, s6, s11 1611; GFX8-NEXT: s_and_b32 s8, s8, s10 1612; GFX8-NEXT: s_or_b32 s0, s0, s6 1613; GFX8-NEXT: s_and_b32 s6, s7, s10 1614; GFX8-NEXT: s_and_b32 s7, s9, s10 1615; GFX8-NEXT: s_lshr_b32 s9, s2, 16 1616; GFX8-NEXT: s_lshr_b32 s12, s2, 24 1617; GFX8-NEXT: s_and_b32 s2, s2, s10 1618; GFX8-NEXT: s_lshl_b32 s8, s8, s11 1619; GFX8-NEXT: s_or_b32 s2, s2, s8 1620; GFX8-NEXT: s_and_b32 s8, s9, s10 1621; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1622; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000 1623; GFX8-NEXT: v_mul_lo_u32 v2, v1, v0 1624; GFX8-NEXT: s_lshr_b32 s13, s3, 8 1625; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 1626; GFX8-NEXT: s_lshl_b32 s8, s8, 16 1627; GFX8-NEXT: s_and_b32 s3, s3, s10 1628; GFX8-NEXT: s_or_b32 s2, s2, s8 1629; GFX8-NEXT: s_lshl_b32 s3, s3, s11 1630; GFX8-NEXT: s_and_b32 s8, s13, s10 1631; GFX8-NEXT: s_or_b32 s3, s12, s3 1632; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000 1633; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 1634; GFX8-NEXT: s_lshl_b32 s8, s8, 16 1635; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 1636; GFX8-NEXT: s_or_b32 s3, s3, s8 1637; GFX8-NEXT: s_lshr_b32 s8, s4, 8 1638; GFX8-NEXT: s_and_b32 s8, s8, s10 1639; GFX8-NEXT: s_lshr_b32 s9, s4, 16 1640; GFX8-NEXT: s_lshr_b32 s12, s4, 24 1641; GFX8-NEXT: s_and_b32 s4, s4, s10 1642; GFX8-NEXT: s_lshl_b32 s8, s8, s11 1643; GFX8-NEXT: s_or_b32 s4, s4, s8 1644; GFX8-NEXT: s_and_b32 s8, s9, s10 1645; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1646; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 1647; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000 1648; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 1649; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 1650; GFX8-NEXT: s_lshl_b32 s8, s8, 16 1651; GFX8-NEXT: s_or_b32 s4, s4, s8 1652; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0 1653; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 1654; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2 1655; GFX8-NEXT: s_lshr_b32 s13, s5, 8 1656; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 1657; GFX8-NEXT: s_and_b32 s5, s5, s10 1658; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2 1659; GFX8-NEXT: s_lshl_b32 s5, s5, s11 1660; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 1661; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 1662; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1663; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1 1664; GFX8-NEXT: s_and_b32 s8, s13, s10 1665; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1666; GFX8-NEXT: s_or_b32 s5, s12, s5 1667; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000 1668; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 1669; GFX8-NEXT: s_bfe_u32 s5, s5, 0x100000 1670; GFX8-NEXT: s_lshl_b32 s8, s8, 16 1671; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1672; GFX8-NEXT: s_or_b32 s5, s5, s8 1673; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1674; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 1675; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1 1676; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 1677; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000 1678; GFX8-NEXT: s_mov_b32 s8, 0xffffff 1679; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 1680; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0 1681; GFX8-NEXT: s_lshl_b32 s4, s6, 17 1682; GFX8-NEXT: s_lshl_b32 s0, s0, 1 1683; GFX8-NEXT: s_or_b32 s0, s4, s0 1684; GFX8-NEXT: v_and_b32_e32 v2, s8, v3 1685; GFX8-NEXT: v_and_b32_e32 v0, s8, v0 1686; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 1687; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s2 1688; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s5, v1 1689; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 1690; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 1691; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1692; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1693; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 1694; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1695; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 1696; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000 1697; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1698; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffff 1699; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1 1700; GFX8-NEXT: s_lshl_b32 s0, s7, 17 1701; GFX8-NEXT: s_lshl_b32 s1, s1, 1 1702; GFX8-NEXT: s_or_b32 s0, s0, s1 1703; GFX8-NEXT: v_and_b32_e32 v2, v2, v4 1704; GFX8-NEXT: v_and_b32_e32 v1, v1, v4 1705; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 1706; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s3 1707; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 1708; GFX8-NEXT: v_mov_b32_e32 v2, 8 1709; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1710; GFX8-NEXT: v_mov_b32_e32 v4, 16 1711; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1712; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1713; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 1714; GFX8-NEXT: v_and_b32_e32 v3, s10, v1 1715; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1716; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1717; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 1718; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD 1719; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1720; GFX8-NEXT: v_readfirstlane_b32 s1, v1 1721; GFX8-NEXT: ; return to shader part epilog 1722; 1723; GFX9-LABEL: s_fshr_v2i24: 1724; GFX9: ; %bb.0: 1725; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1726; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 1727; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1728; GFX9-NEXT: s_movk_i32 s12, 0xff 1729; GFX9-NEXT: s_lshr_b32 s11, s1, 8 1730; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1731; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 1732; GFX9-NEXT: s_bfe_u32 s13, 8, 0x100000 1733; GFX9-NEXT: s_and_b32 s1, s1, s12 1734; GFX9-NEXT: s_lshr_b32 s7, s0, 8 1735; GFX9-NEXT: v_mul_lo_u32 v2, v1, v0 1736; GFX9-NEXT: s_lshr_b32 s10, s0, 24 1737; GFX9-NEXT: s_lshl_b32 s1, s1, s13 1738; GFX9-NEXT: s_and_b32 s7, s7, s12 1739; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 1740; GFX9-NEXT: s_or_b32 s1, s10, s1 1741; GFX9-NEXT: s_lshr_b32 s10, s2, 8 1742; GFX9-NEXT: s_lshr_b32 s9, s0, 16 1743; GFX9-NEXT: s_and_b32 s0, s0, s12 1744; GFX9-NEXT: s_lshl_b32 s7, s7, s13 1745; GFX9-NEXT: s_and_b32 s10, s10, s12 1746; GFX9-NEXT: s_or_b32 s0, s0, s7 1747; GFX9-NEXT: s_and_b32 s7, s9, s12 1748; GFX9-NEXT: s_and_b32 s9, s11, s12 1749; GFX9-NEXT: s_lshr_b32 s11, s2, 16 1750; GFX9-NEXT: s_lshr_b32 s14, s2, 24 1751; GFX9-NEXT: s_and_b32 s2, s2, s12 1752; GFX9-NEXT: s_lshl_b32 s10, s10, s13 1753; GFX9-NEXT: s_or_b32 s2, s2, s10 1754; GFX9-NEXT: s_and_b32 s10, s11, s12 1755; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 1756; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 1757; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 1758; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 1759; GFX9-NEXT: s_lshr_b32 s15, s3, 8 1760; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 1761; GFX9-NEXT: s_lshl_b32 s10, s10, 16 1762; GFX9-NEXT: s_and_b32 s3, s3, s12 1763; GFX9-NEXT: s_or_b32 s2, s2, s10 1764; GFX9-NEXT: s_lshl_b32 s3, s3, s13 1765; GFX9-NEXT: s_and_b32 s10, s15, s12 1766; GFX9-NEXT: s_or_b32 s3, s14, s3 1767; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 1768; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000 1769; GFX9-NEXT: s_lshl_b32 s10, s10, 16 1770; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 1771; GFX9-NEXT: s_or_b32 s3, s3, s10 1772; GFX9-NEXT: s_lshr_b32 s10, s4, 8 1773; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 1774; GFX9-NEXT: s_and_b32 s10, s10, s12 1775; GFX9-NEXT: s_lshr_b32 s11, s4, 16 1776; GFX9-NEXT: s_lshr_b32 s14, s4, 24 1777; GFX9-NEXT: s_and_b32 s4, s4, s12 1778; GFX9-NEXT: s_lshl_b32 s10, s10, s13 1779; GFX9-NEXT: s_or_b32 s4, s4, s10 1780; GFX9-NEXT: s_and_b32 s10, s11, s12 1781; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 1782; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2 1783; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 1784; GFX9-NEXT: s_lshl_b32 s10, s10, 16 1785; GFX9-NEXT: s_or_b32 s4, s4, s10 1786; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 1787; GFX9-NEXT: s_lshr_b32 s15, s5, 8 1788; GFX9-NEXT: s_and_b32 s5, s5, s12 1789; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1 1790; GFX9-NEXT: s_lshl_b32 s5, s5, s13 1791; GFX9-NEXT: s_and_b32 s10, s15, s12 1792; GFX9-NEXT: s_or_b32 s5, s14, s5 1793; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 1794; GFX9-NEXT: s_bfe_u32 s5, s5, 0x100000 1795; GFX9-NEXT: s_lshl_b32 s10, s10, 16 1796; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 1797; GFX9-NEXT: s_or_b32 s5, s5, s10 1798; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 1799; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 1800; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 1801; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 1802; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1803; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1804; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 1805; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 1806; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1807; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 1808; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000 1809; GFX9-NEXT: s_mov_b32 s10, 0xffffff 1810; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1811; GFX9-NEXT: v_sub_u32_e32 v3, 23, v0 1812; GFX9-NEXT: s_lshl_b32 s4, s7, 17 1813; GFX9-NEXT: s_lshl_b32 s0, s0, 1 1814; GFX9-NEXT: v_and_b32_e32 v0, s10, v0 1815; GFX9-NEXT: s_or_b32 s0, s4, s0 1816; GFX9-NEXT: v_and_b32_e32 v3, s10, v3 1817; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s2 1818; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1 1819; GFX9-NEXT: v_lshl_or_b32 v0, s0, v3, v0 1820; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 1821; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1822; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1823; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 1824; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1825; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 1826; GFX9-NEXT: s_bfe_u32 s9, s9, 0x100000 1827; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffff 1828; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1829; GFX9-NEXT: v_sub_u32_e32 v3, 23, v1 1830; GFX9-NEXT: s_lshl_b32 s0, s9, 17 1831; GFX9-NEXT: s_lshl_b32 s1, s1, 1 1832; GFX9-NEXT: v_and_b32_e32 v1, v1, v2 1833; GFX9-NEXT: s_or_b32 s0, s0, s1 1834; GFX9-NEXT: v_and_b32_e32 v3, v3, v2 1835; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s3 1836; GFX9-NEXT: s_mov_b32 s6, 8 1837; GFX9-NEXT: v_lshl_or_b32 v1, s0, v3, v1 1838; GFX9-NEXT: s_mov_b32 s8, 16 1839; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1840; GFX9-NEXT: v_and_b32_e32 v3, s12, v1 1841; GFX9-NEXT: v_and_or_b32 v2, v0, s12, v2 1842; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1843; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1844; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 1845; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8 1846; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8 1847; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2 1848; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1849; GFX9-NEXT: v_readfirstlane_b32 s1, v1 1850; GFX9-NEXT: ; return to shader part epilog 1851; 1852; GFX10-LABEL: s_fshr_v2i24: 1853; GFX10: ; %bb.0: 1854; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1855; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 1856; GFX10-NEXT: s_movk_i32 s9, 0xff 1857; GFX10-NEXT: s_lshr_b32 s12, s4, 8 1858; GFX10-NEXT: s_bfe_u32 s10, 8, 0x100000 1859; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 1860; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 1861; GFX10-NEXT: s_lshr_b32 s13, s4, 16 1862; GFX10-NEXT: s_and_b32 s12, s12, s9 1863; GFX10-NEXT: s_lshr_b32 s14, s4, 24 1864; GFX10-NEXT: s_and_b32 s4, s4, s9 1865; GFX10-NEXT: s_lshl_b32 s12, s12, s10 1866; GFX10-NEXT: s_and_b32 s13, s13, s9 1867; GFX10-NEXT: s_or_b32 s4, s4, s12 1868; GFX10-NEXT: s_bfe_u32 s12, s13, 0x100000 1869; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1870; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 1871; GFX10-NEXT: s_lshr_b32 s15, s5, 8 1872; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000 1873; GFX10-NEXT: s_lshl_b32 s12, s12, 16 1874; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 1875; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 1876; GFX10-NEXT: s_and_b32 s5, s5, s9 1877; GFX10-NEXT: s_or_b32 s4, s4, s12 1878; GFX10-NEXT: s_lshl_b32 s5, s5, s10 1879; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 1880; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 1881; GFX10-NEXT: s_and_b32 s12, s15, s9 1882; GFX10-NEXT: s_or_b32 s5, s14, s5 1883; GFX10-NEXT: s_bfe_u32 s12, s12, 0x100000 1884; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000 1885; GFX10-NEXT: s_lshl_b32 s12, s12, 16 1886; GFX10-NEXT: s_lshr_b32 s11, s1, 8 1887; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 1888; GFX10-NEXT: s_or_b32 s5, s5, s12 1889; GFX10-NEXT: s_and_b32 s1, s1, s9 1890; GFX10-NEXT: s_lshr_b32 s6, s0, 8 1891; GFX10-NEXT: s_lshr_b32 s8, s0, 24 1892; GFX10-NEXT: s_lshl_b32 s1, s1, s10 1893; GFX10-NEXT: s_and_b32 s6, s6, s9 1894; GFX10-NEXT: s_or_b32 s1, s8, s1 1895; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 1896; GFX10-NEXT: v_mul_hi_u32 v2, v1, v3 1897; GFX10-NEXT: s_lshr_b32 s8, s2, 8 1898; GFX10-NEXT: s_lshr_b32 s7, s0, 16 1899; GFX10-NEXT: s_and_b32 s0, s0, s9 1900; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 1901; GFX10-NEXT: s_lshl_b32 s6, s6, s10 1902; GFX10-NEXT: s_and_b32 s8, s8, s9 1903; GFX10-NEXT: s_or_b32 s0, s0, s6 1904; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 1905; GFX10-NEXT: s_and_b32 s6, s7, s9 1906; GFX10-NEXT: s_and_b32 s7, s11, s9 1907; GFX10-NEXT: s_lshr_b32 s11, s2, 16 1908; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 1909; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1 1910; GFX10-NEXT: s_lshr_b32 s13, s2, 24 1911; GFX10-NEXT: s_and_b32 s2, s2, s9 1912; GFX10-NEXT: s_lshl_b32 s8, s8, s10 1913; GFX10-NEXT: s_lshr_b32 s12, s3, 8 1914; GFX10-NEXT: s_or_b32 s2, s2, s8 1915; GFX10-NEXT: s_and_b32 s8, s11, s9 1916; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0 1917; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 1918; GFX10-NEXT: s_bfe_u32 s4, s8, 0x100000 1919; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 1920; GFX10-NEXT: s_lshl_b32 s4, s4, 16 1921; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 1922; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1923; GFX10-NEXT: s_and_b32 s3, s3, s9 1924; GFX10-NEXT: s_or_b32 s2, s2, s4 1925; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1 1926; GFX10-NEXT: s_mov_b32 s4, 0xffffff 1927; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1928; GFX10-NEXT: s_lshl_b32 s3, s3, s10 1929; GFX10-NEXT: s_and_b32 s5, s12, s9 1930; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 1931; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 1932; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 1933; GFX10-NEXT: s_or_b32 s3, s13, s3 1934; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000 1935; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000 1936; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 1937; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1938; GFX10-NEXT: s_lshl_b32 s5, s5, 16 1939; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 1940; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 1941; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 1942; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 1943; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 1944; GFX10-NEXT: s_or_b32 s3, s3, s5 1945; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 1946; GFX10-NEXT: s_bfe_u32 s7, s7, 0x100000 1947; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 1948; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 1949; GFX10-NEXT: v_mov_b32_e32 v2, 0xffffff 1950; GFX10-NEXT: v_and_b32_e32 v0, s4, v0 1951; GFX10-NEXT: s_lshl_b32 s5, s6, 17 1952; GFX10-NEXT: v_and_b32_e32 v3, s4, v3 1953; GFX10-NEXT: v_sub_nc_u32_e32 v4, 23, v1 1954; GFX10-NEXT: v_and_b32_e32 v1, v1, v2 1955; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s2 1956; GFX10-NEXT: s_lshl_b32 s0, s0, 1 1957; GFX10-NEXT: s_lshl_b32 s2, s7, 17 1958; GFX10-NEXT: v_and_b32_e32 v2, v4, v2 1959; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s3 1960; GFX10-NEXT: s_or_b32 s0, s5, s0 1961; GFX10-NEXT: s_lshl_b32 s1, s1, 1 1962; GFX10-NEXT: v_lshl_or_b32 v0, s0, v3, v0 1963; GFX10-NEXT: s_or_b32 s0, s2, s1 1964; GFX10-NEXT: v_lshl_or_b32 v1, s0, v2, v1 1965; GFX10-NEXT: s_mov_b32 s0, 8 1966; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1967; GFX10-NEXT: s_mov_b32 s0, 16 1968; GFX10-NEXT: v_and_b32_e32 v3, s9, v1 1969; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8 1970; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8 1971; GFX10-NEXT: v_and_or_b32 v2, v0, s9, v2 1972; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1973; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1974; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4 1975; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3 1976; GFX10-NEXT: v_readfirstlane_b32 s1, v1 1977; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1978; GFX10-NEXT: ; return to shader part epilog 1979 %lhs = bitcast i48 %lhs.arg to <2 x i24> 1980 %rhs = bitcast i48 %rhs.arg to <2 x i24> 1981 %amt = bitcast i48 %amt.arg to <2 x i24> 1982 %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) 1983 %cast.result = bitcast <2 x i24> %result to i48 1984 ret i48 %cast.result 1985} 1986 1987define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { 1988; GFX6-LABEL: v_fshr_v2i24: 1989; GFX6: ; %bb.0: 1990; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1991; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 1992; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 1993; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8 1994; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 1995; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 1996; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 1997; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 1998; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1999; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2000; GFX6-NEXT: v_mul_lo_u32 v8, v7, v6 2001; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8 2002; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 2003; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6 2004; GFX6-NEXT: v_rcp_iflag_f32_e32 v8, v9 2005; GFX6-NEXT: v_mov_b32_e32 v9, 0xffffff 2006; GFX6-NEXT: v_and_b32_e32 v5, v5, v9 2007; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 2008; GFX6-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 2009; GFX6-NEXT: v_cvt_u32_f32_e32 v8, v8 2010; GFX6-NEXT: v_and_b32_e32 v2, v2, v9 2011; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 2012; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 2013; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2014; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2015; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 2016; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2017; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2018; GFX6-NEXT: v_mul_lo_u32 v6, v7, v8 2019; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4 2020; GFX6-NEXT: v_and_b32_e32 v7, v7, v9 2021; GFX6-NEXT: v_mul_hi_u32 v6, v8, v6 2022; GFX6-NEXT: v_and_b32_e32 v4, v4, v9 2023; GFX6-NEXT: v_lshlrev_b32_e32 v0, v7, v0 2024; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2025; GFX6-NEXT: v_add_i32_e32 v6, vcc, v8, v6 2026; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 2027; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 2028; GFX6-NEXT: v_and_b32_e32 v3, v3, v9 2029; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 2030; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 2031; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 2032; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2033; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2034; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 2035; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2036; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2037; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2 2038; GFX6-NEXT: v_and_b32_e32 v4, v4, v9 2039; GFX6-NEXT: v_and_b32_e32 v2, v2, v9 2040; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 2041; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v3 2042; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 2043; GFX6-NEXT: s_setpc_b64 s[30:31] 2044; 2045; GFX8-LABEL: v_fshr_v2i24: 2046; GFX8: ; %bb.0: 2047; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2048; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2049; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 2050; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8 2051; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2052; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 2053; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2054; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 2055; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2056; GFX8-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2057; GFX8-NEXT: v_mul_lo_u32 v8, v7, v6 2058; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 2059; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 2060; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6 2061; GFX8-NEXT: v_rcp_iflag_f32_e32 v8, v9 2062; GFX8-NEXT: v_mov_b32_e32 v9, 0xffffff 2063; GFX8-NEXT: v_and_b32_e32 v5, v5, v9 2064; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 2065; GFX8-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 2066; GFX8-NEXT: v_cvt_u32_f32_e32 v8, v8 2067; GFX8-NEXT: v_and_b32_e32 v2, v2, v9 2068; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6 2069; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 2070; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2071; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2072; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 2073; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2074; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2075; GFX8-NEXT: v_mul_lo_u32 v6, v7, v8 2076; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4 2077; GFX8-NEXT: v_and_b32_e32 v7, v7, v9 2078; GFX8-NEXT: v_mul_hi_u32 v6, v8, v6 2079; GFX8-NEXT: v_and_b32_e32 v4, v4, v9 2080; GFX8-NEXT: v_lshlrev_b32_e32 v0, v7, v0 2081; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2082; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6 2083; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 2084; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2085; GFX8-NEXT: v_and_b32_e32 v3, v3, v9 2086; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 2087; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6 2088; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2 2089; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2090; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2091; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2 2092; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2093; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2094; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2 2095; GFX8-NEXT: v_and_b32_e32 v4, v4, v9 2096; GFX8-NEXT: v_and_b32_e32 v2, v2, v9 2097; GFX8-NEXT: v_lshlrev_b32_e32 v1, v4, v1 2098; GFX8-NEXT: v_lshrrev_b32_e32 v2, v2, v3 2099; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 2100; GFX8-NEXT: s_setpc_b64 s[30:31] 2101; 2102; GFX9-LABEL: v_fshr_v2i24: 2103; GFX9: ; %bb.0: 2104; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2105; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2106; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 2107; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 2108; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 2109; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 2110; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2111; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 2112; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2113; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 2114; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2115; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 2116; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2117; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 2118; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 2119; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v9 2120; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 2121; GFX9-NEXT: v_mov_b32_e32 v9, 0xffffff 2122; GFX9-NEXT: v_and_b32_e32 v5, v5, v9 2123; GFX9-NEXT: v_mul_lo_u32 v7, v7, v8 2124; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 2125; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 2126; GFX9-NEXT: v_and_b32_e32 v3, v3, v9 2127; GFX9-NEXT: v_mul_hi_u32 v7, v8, v7 2128; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 2129; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 2130; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2131; GFX9-NEXT: v_add_u32_e32 v7, v8, v7 2132; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 2133; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2134; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 2135; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2136; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 2137; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2138; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 2139; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 2140; GFX9-NEXT: v_and_b32_e32 v6, v6, v9 2141; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2142; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2 2143; GFX9-NEXT: v_sub_u32_e32 v2, v5, v7 2144; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2 2145; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2146; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2147; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2 2148; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2149; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2150; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 2151; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 2152; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 2153; GFX9-NEXT: v_lshrrev_b32_e32 v2, v2, v3 2154; GFX9-NEXT: v_lshl_or_b32 v1, v1, v4, v2 2155; GFX9-NEXT: s_setpc_b64 s[30:31] 2156; 2157; GFX10-LABEL: v_fshr_v2i24: 2158; GFX10: ; %bb.0: 2159; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2160; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2161; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2162; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 2163; GFX10-NEXT: v_mov_b32_e32 v10, 0xffffff 2164; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2165; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2166; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 2167; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7 2168; GFX10-NEXT: v_and_b32_e32 v5, v5, v10 2169; GFX10-NEXT: v_and_b32_e32 v2, v2, v10 2170; GFX10-NEXT: v_and_b32_e32 v3, v3, v10 2171; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2172; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2173; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 2174; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 2175; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7 2176; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 2177; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 2178; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8 2179; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9 2180; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8 2181; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9 2182; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6 2183; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7 2184; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 2185; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 2186; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6 2187; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7 2188; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 2189; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2190; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 2191; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2192; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2193; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 2194; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2195; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2196; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 2197; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2198; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2199; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4 2200; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2201; GFX10-NEXT: v_and_b32_e32 v4, v4, v10 2202; GFX10-NEXT: v_and_b32_e32 v6, v6, v10 2203; GFX10-NEXT: v_sub_nc_u32_e32 v7, 23, v5 2204; GFX10-NEXT: v_and_b32_e32 v5, v5, v10 2205; GFX10-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2206; GFX10-NEXT: v_and_b32_e32 v4, v7, v10 2207; GFX10-NEXT: v_lshrrev_b32_e32 v3, v5, v3 2208; GFX10-NEXT: v_lshl_or_b32 v0, v0, v6, v2 2209; GFX10-NEXT: v_lshl_or_b32 v1, v1, v4, v3 2210; GFX10-NEXT: s_setpc_b64 s[30:31] 2211 %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) 2212 ret <2 x i24> %result 2213} 2214 2215define amdgpu_ps i32 @s_fshr_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) { 2216; GFX6-LABEL: s_fshr_i32: 2217; GFX6: ; %bb.0: 2218; GFX6-NEXT: v_mov_b32_e32 v0, s1 2219; GFX6-NEXT: v_mov_b32_e32 v1, s2 2220; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 2221; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2222; GFX6-NEXT: ; return to shader part epilog 2223; 2224; GFX8-LABEL: s_fshr_i32: 2225; GFX8: ; %bb.0: 2226; GFX8-NEXT: v_mov_b32_e32 v0, s1 2227; GFX8-NEXT: v_mov_b32_e32 v1, s2 2228; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 2229; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2230; GFX8-NEXT: ; return to shader part epilog 2231; 2232; GFX9-LABEL: s_fshr_i32: 2233; GFX9: ; %bb.0: 2234; GFX9-NEXT: v_mov_b32_e32 v0, s1 2235; GFX9-NEXT: v_mov_b32_e32 v1, s2 2236; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 2237; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2238; GFX9-NEXT: ; return to shader part epilog 2239; 2240; GFX10-LABEL: s_fshr_i32: 2241; GFX10: ; %bb.0: 2242; GFX10-NEXT: v_mov_b32_e32 v0, s2 2243; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 2244; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2245; GFX10-NEXT: ; return to shader part epilog 2246 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2247 ret i32 %result 2248} 2249 2250define amdgpu_ps i32 @s_fshr_i32_5(i32 inreg %lhs, i32 inreg %rhs) { 2251; GFX6-LABEL: s_fshr_i32_5: 2252; GFX6: ; %bb.0: 2253; GFX6-NEXT: v_mov_b32_e32 v0, s1 2254; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 5 2255; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2256; GFX6-NEXT: ; return to shader part epilog 2257; 2258; GFX8-LABEL: s_fshr_i32_5: 2259; GFX8: ; %bb.0: 2260; GFX8-NEXT: v_mov_b32_e32 v0, s1 2261; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 5 2262; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2263; GFX8-NEXT: ; return to shader part epilog 2264; 2265; GFX9-LABEL: s_fshr_i32_5: 2266; GFX9: ; %bb.0: 2267; GFX9-NEXT: v_mov_b32_e32 v0, s1 2268; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 5 2269; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2270; GFX9-NEXT: ; return to shader part epilog 2271; 2272; GFX10-LABEL: s_fshr_i32_5: 2273; GFX10: ; %bb.0: 2274; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 5 2275; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2276; GFX10-NEXT: ; return to shader part epilog 2277 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5) 2278 ret i32 %result 2279} 2280 2281define amdgpu_ps i32 @s_fshr_i32_8(i32 inreg %lhs, i32 inreg %rhs) { 2282; GFX6-LABEL: s_fshr_i32_8: 2283; GFX6: ; %bb.0: 2284; GFX6-NEXT: v_mov_b32_e32 v0, s1 2285; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 8 2286; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2287; GFX6-NEXT: ; return to shader part epilog 2288; 2289; GFX8-LABEL: s_fshr_i32_8: 2290; GFX8: ; %bb.0: 2291; GFX8-NEXT: v_mov_b32_e32 v0, s1 2292; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 8 2293; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2294; GFX8-NEXT: ; return to shader part epilog 2295; 2296; GFX9-LABEL: s_fshr_i32_8: 2297; GFX9: ; %bb.0: 2298; GFX9-NEXT: v_mov_b32_e32 v0, s1 2299; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 8 2300; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2301; GFX9-NEXT: ; return to shader part epilog 2302; 2303; GFX10-LABEL: s_fshr_i32_8: 2304; GFX10: ; %bb.0: 2305; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 8 2306; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2307; GFX10-NEXT: ; return to shader part epilog 2308 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8) 2309 ret i32 %result 2310} 2311 2312define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) { 2313; GFX6-LABEL: v_fshr_i32: 2314; GFX6: ; %bb.0: 2315; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2316; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, v2 2317; GFX6-NEXT: s_setpc_b64 s[30:31] 2318; 2319; GFX8-LABEL: v_fshr_i32: 2320; GFX8: ; %bb.0: 2321; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2322; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, v2 2323; GFX8-NEXT: s_setpc_b64 s[30:31] 2324; 2325; GFX9-LABEL: v_fshr_i32: 2326; GFX9: ; %bb.0: 2327; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2328; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2 2329; GFX9-NEXT: s_setpc_b64 s[30:31] 2330; 2331; GFX10-LABEL: v_fshr_i32: 2332; GFX10: ; %bb.0: 2333; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2334; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2335; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2 2336; GFX10-NEXT: s_setpc_b64 s[30:31] 2337 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2338 ret i32 %result 2339} 2340 2341define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) { 2342; GFX6-LABEL: v_fshr_i32_5: 2343; GFX6: ; %bb.0: 2344; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2345; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, 5 2346; GFX6-NEXT: s_setpc_b64 s[30:31] 2347; 2348; GFX8-LABEL: v_fshr_i32_5: 2349; GFX8: ; %bb.0: 2350; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2351; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, 5 2352; GFX8-NEXT: s_setpc_b64 s[30:31] 2353; 2354; GFX9-LABEL: v_fshr_i32_5: 2355; GFX9: ; %bb.0: 2356; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2357; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, 5 2358; GFX9-NEXT: s_setpc_b64 s[30:31] 2359; 2360; GFX10-LABEL: v_fshr_i32_5: 2361; GFX10: ; %bb.0: 2362; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2363; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2364; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, 5 2365; GFX10-NEXT: s_setpc_b64 s[30:31] 2366 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5) 2367 ret i32 %result 2368} 2369 2370define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) { 2371; GFX6-LABEL: v_fshr_i32_8: 2372; GFX6: ; %bb.0: 2373; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2374; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, 8 2375; GFX6-NEXT: s_setpc_b64 s[30:31] 2376; 2377; GFX8-LABEL: v_fshr_i32_8: 2378; GFX8: ; %bb.0: 2379; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2380; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, 8 2381; GFX8-NEXT: s_setpc_b64 s[30:31] 2382; 2383; GFX9-LABEL: v_fshr_i32_8: 2384; GFX9: ; %bb.0: 2385; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2386; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, 8 2387; GFX9-NEXT: s_setpc_b64 s[30:31] 2388; 2389; GFX10-LABEL: v_fshr_i32_8: 2390; GFX10: ; %bb.0: 2391; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2392; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2393; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, 8 2394; GFX10-NEXT: s_setpc_b64 s[30:31] 2395 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8) 2396 ret i32 %result 2397} 2398 2399define amdgpu_ps float @v_fshr_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) { 2400; GFX6-LABEL: v_fshr_i32_ssv: 2401; GFX6: ; %bb.0: 2402; GFX6-NEXT: v_mov_b32_e32 v1, s1 2403; GFX6-NEXT: v_alignbit_b32 v0, s0, v1, v0 2404; GFX6-NEXT: ; return to shader part epilog 2405; 2406; GFX8-LABEL: v_fshr_i32_ssv: 2407; GFX8: ; %bb.0: 2408; GFX8-NEXT: v_mov_b32_e32 v1, s1 2409; GFX8-NEXT: v_alignbit_b32 v0, s0, v1, v0 2410; GFX8-NEXT: ; return to shader part epilog 2411; 2412; GFX9-LABEL: v_fshr_i32_ssv: 2413; GFX9: ; %bb.0: 2414; GFX9-NEXT: v_mov_b32_e32 v1, s1 2415; GFX9-NEXT: v_alignbit_b32 v0, s0, v1, v0 2416; GFX9-NEXT: ; return to shader part epilog 2417; 2418; GFX10-LABEL: v_fshr_i32_ssv: 2419; GFX10: ; %bb.0: 2420; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 2421; GFX10-NEXT: ; return to shader part epilog 2422 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2423 %cast.result = bitcast i32 %result to float 2424 ret float %cast.result 2425} 2426 2427define amdgpu_ps float @v_fshr_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) { 2428; GFX6-LABEL: v_fshr_i32_svs: 2429; GFX6: ; %bb.0: 2430; GFX6-NEXT: v_mov_b32_e32 v1, s1 2431; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 2432; GFX6-NEXT: ; return to shader part epilog 2433; 2434; GFX8-LABEL: v_fshr_i32_svs: 2435; GFX8: ; %bb.0: 2436; GFX8-NEXT: v_mov_b32_e32 v1, s1 2437; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 2438; GFX8-NEXT: ; return to shader part epilog 2439; 2440; GFX9-LABEL: v_fshr_i32_svs: 2441; GFX9: ; %bb.0: 2442; GFX9-NEXT: v_mov_b32_e32 v1, s1 2443; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 2444; GFX9-NEXT: ; return to shader part epilog 2445; 2446; GFX10-LABEL: v_fshr_i32_svs: 2447; GFX10: ; %bb.0: 2448; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1 2449; GFX10-NEXT: ; return to shader part epilog 2450 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2451 %cast.result = bitcast i32 %result to float 2452 ret float %cast.result 2453} 2454 2455define amdgpu_ps float @v_fshr_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) { 2456; GFX6-LABEL: v_fshr_i32_vss: 2457; GFX6: ; %bb.0: 2458; GFX6-NEXT: v_mov_b32_e32 v0, s1 2459; GFX6-NEXT: v_mov_b32_e32 v1, s2 2460; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 2461; GFX6-NEXT: ; return to shader part epilog 2462; 2463; GFX8-LABEL: v_fshr_i32_vss: 2464; GFX8: ; %bb.0: 2465; GFX8-NEXT: v_mov_b32_e32 v0, s1 2466; GFX8-NEXT: v_mov_b32_e32 v1, s2 2467; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 2468; GFX8-NEXT: ; return to shader part epilog 2469; 2470; GFX9-LABEL: v_fshr_i32_vss: 2471; GFX9: ; %bb.0: 2472; GFX9-NEXT: v_mov_b32_e32 v0, s1 2473; GFX9-NEXT: v_mov_b32_e32 v1, s2 2474; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 2475; GFX9-NEXT: ; return to shader part epilog 2476; 2477; GFX10-LABEL: v_fshr_i32_vss: 2478; GFX10: ; %bb.0: 2479; GFX10-NEXT: v_mov_b32_e32 v0, s2 2480; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 2481; GFX10-NEXT: ; return to shader part epilog 2482 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2483 %cast.result = bitcast i32 %result to float 2484 ret float %cast.result 2485} 2486 2487define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) { 2488; GFX6-LABEL: v_fshr_v2i32: 2489; GFX6: ; %bb.0: 2490; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2491; GFX6-NEXT: v_alignbit_b32 v0, v0, v2, v4 2492; GFX6-NEXT: v_alignbit_b32 v1, v1, v3, v5 2493; GFX6-NEXT: s_setpc_b64 s[30:31] 2494; 2495; GFX8-LABEL: v_fshr_v2i32: 2496; GFX8: ; %bb.0: 2497; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2498; GFX8-NEXT: v_alignbit_b32 v0, v0, v2, v4 2499; GFX8-NEXT: v_alignbit_b32 v1, v1, v3, v5 2500; GFX8-NEXT: s_setpc_b64 s[30:31] 2501; 2502; GFX9-LABEL: v_fshr_v2i32: 2503; GFX9: ; %bb.0: 2504; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2505; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4 2506; GFX9-NEXT: v_alignbit_b32 v1, v1, v3, v5 2507; GFX9-NEXT: s_setpc_b64 s[30:31] 2508; 2509; GFX10-LABEL: v_fshr_v2i32: 2510; GFX10: ; %bb.0: 2511; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2512; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2513; GFX10-NEXT: v_alignbit_b32 v0, v0, v2, v4 2514; GFX10-NEXT: v_alignbit_b32 v1, v1, v3, v5 2515; GFX10-NEXT: s_setpc_b64 s[30:31] 2516 %result = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) 2517 ret <2 x i32> %result 2518} 2519 2520define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) { 2521; GFX6-LABEL: v_fshr_v3i32: 2522; GFX6: ; %bb.0: 2523; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2524; GFX6-NEXT: v_alignbit_b32 v0, v0, v3, v6 2525; GFX6-NEXT: v_alignbit_b32 v1, v1, v4, v7 2526; GFX6-NEXT: v_alignbit_b32 v2, v2, v5, v8 2527; GFX6-NEXT: s_setpc_b64 s[30:31] 2528; 2529; GFX8-LABEL: v_fshr_v3i32: 2530; GFX8: ; %bb.0: 2531; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2532; GFX8-NEXT: v_alignbit_b32 v0, v0, v3, v6 2533; GFX8-NEXT: v_alignbit_b32 v1, v1, v4, v7 2534; GFX8-NEXT: v_alignbit_b32 v2, v2, v5, v8 2535; GFX8-NEXT: s_setpc_b64 s[30:31] 2536; 2537; GFX9-LABEL: v_fshr_v3i32: 2538; GFX9: ; %bb.0: 2539; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2540; GFX9-NEXT: v_alignbit_b32 v0, v0, v3, v6 2541; GFX9-NEXT: v_alignbit_b32 v1, v1, v4, v7 2542; GFX9-NEXT: v_alignbit_b32 v2, v2, v5, v8 2543; GFX9-NEXT: s_setpc_b64 s[30:31] 2544; 2545; GFX10-LABEL: v_fshr_v3i32: 2546; GFX10: ; %bb.0: 2547; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2548; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2549; GFX10-NEXT: v_alignbit_b32 v0, v0, v3, v6 2550; GFX10-NEXT: v_alignbit_b32 v1, v1, v4, v7 2551; GFX10-NEXT: v_alignbit_b32 v2, v2, v5, v8 2552; GFX10-NEXT: s_setpc_b64 s[30:31] 2553 %result = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) 2554 ret <3 x i32> %result 2555} 2556 2557define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) { 2558; GFX6-LABEL: v_fshr_v4i32: 2559; GFX6: ; %bb.0: 2560; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2561; GFX6-NEXT: v_alignbit_b32 v0, v0, v4, v8 2562; GFX6-NEXT: v_alignbit_b32 v1, v1, v5, v9 2563; GFX6-NEXT: v_alignbit_b32 v2, v2, v6, v10 2564; GFX6-NEXT: v_alignbit_b32 v3, v3, v7, v11 2565; GFX6-NEXT: s_setpc_b64 s[30:31] 2566; 2567; GFX8-LABEL: v_fshr_v4i32: 2568; GFX8: ; %bb.0: 2569; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2570; GFX8-NEXT: v_alignbit_b32 v0, v0, v4, v8 2571; GFX8-NEXT: v_alignbit_b32 v1, v1, v5, v9 2572; GFX8-NEXT: v_alignbit_b32 v2, v2, v6, v10 2573; GFX8-NEXT: v_alignbit_b32 v3, v3, v7, v11 2574; GFX8-NEXT: s_setpc_b64 s[30:31] 2575; 2576; GFX9-LABEL: v_fshr_v4i32: 2577; GFX9: ; %bb.0: 2578; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2579; GFX9-NEXT: v_alignbit_b32 v0, v0, v4, v8 2580; GFX9-NEXT: v_alignbit_b32 v1, v1, v5, v9 2581; GFX9-NEXT: v_alignbit_b32 v2, v2, v6, v10 2582; GFX9-NEXT: v_alignbit_b32 v3, v3, v7, v11 2583; GFX9-NEXT: s_setpc_b64 s[30:31] 2584; 2585; GFX10-LABEL: v_fshr_v4i32: 2586; GFX10: ; %bb.0: 2587; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2588; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2589; GFX10-NEXT: v_alignbit_b32 v0, v0, v4, v8 2590; GFX10-NEXT: v_alignbit_b32 v1, v1, v5, v9 2591; GFX10-NEXT: v_alignbit_b32 v2, v2, v6, v10 2592; GFX10-NEXT: v_alignbit_b32 v3, v3, v7, v11 2593; GFX10-NEXT: s_setpc_b64 s[30:31] 2594 %result = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) 2595 ret <4 x i32> %result 2596} 2597 2598define amdgpu_ps i16 @s_fshr_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) { 2599; GFX6-LABEL: s_fshr_i16: 2600; GFX6: ; %bb.0: 2601; GFX6-NEXT: s_and_b32 s3, s2, 15 2602; GFX6-NEXT: s_andn2_b32 s2, 15, s2 2603; GFX6-NEXT: s_lshl_b32 s0, s0, 1 2604; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 2605; GFX6-NEXT: s_lshl_b32 s0, s0, s2 2606; GFX6-NEXT: s_bfe_u32 s2, s3, 0x100000 2607; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 2608; GFX6-NEXT: s_lshr_b32 s1, s1, s2 2609; GFX6-NEXT: s_or_b32 s0, s0, s1 2610; GFX6-NEXT: ; return to shader part epilog 2611; 2612; GFX8-LABEL: s_fshr_i16: 2613; GFX8: ; %bb.0: 2614; GFX8-NEXT: s_and_b32 s3, s2, 15 2615; GFX8-NEXT: s_andn2_b32 s2, 15, s2 2616; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000 2617; GFX8-NEXT: s_lshl_b32 s0, s0, s4 2618; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 2619; GFX8-NEXT: s_lshl_b32 s0, s0, s2 2620; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 2621; GFX8-NEXT: s_bfe_u32 s2, s3, 0x100000 2622; GFX8-NEXT: s_lshr_b32 s1, s1, s2 2623; GFX8-NEXT: s_or_b32 s0, s0, s1 2624; GFX8-NEXT: ; return to shader part epilog 2625; 2626; GFX9-LABEL: s_fshr_i16: 2627; GFX9: ; %bb.0: 2628; GFX9-NEXT: s_and_b32 s3, s2, 15 2629; GFX9-NEXT: s_andn2_b32 s2, 15, s2 2630; GFX9-NEXT: s_bfe_u32 s4, 1, 0x100000 2631; GFX9-NEXT: s_lshl_b32 s0, s0, s4 2632; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 2633; GFX9-NEXT: s_lshl_b32 s0, s0, s2 2634; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 2635; GFX9-NEXT: s_bfe_u32 s2, s3, 0x100000 2636; GFX9-NEXT: s_lshr_b32 s1, s1, s2 2637; GFX9-NEXT: s_or_b32 s0, s0, s1 2638; GFX9-NEXT: ; return to shader part epilog 2639; 2640; GFX10-LABEL: s_fshr_i16: 2641; GFX10: ; %bb.0: 2642; GFX10-NEXT: s_and_b32 s3, s2, 15 2643; GFX10-NEXT: s_bfe_u32 s4, 1, 0x100000 2644; GFX10-NEXT: s_andn2_b32 s2, 15, s2 2645; GFX10-NEXT: s_lshl_b32 s0, s0, s4 2646; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 2647; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 2648; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000 2649; GFX10-NEXT: s_lshl_b32 s0, s0, s2 2650; GFX10-NEXT: s_lshr_b32 s1, s1, s3 2651; GFX10-NEXT: s_or_b32 s0, s0, s1 2652; GFX10-NEXT: ; return to shader part epilog 2653 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 2654 ret i16 %result 2655} 2656 2657define amdgpu_ps i16 @s_fshr_i16_4(i16 inreg %lhs, i16 inreg %rhs) { 2658; GFX6-LABEL: s_fshr_i16_4: 2659; GFX6: ; %bb.0: 2660; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 2661; GFX6-NEXT: s_lshl_b32 s0, s0, 12 2662; GFX6-NEXT: s_lshr_b32 s1, s1, 4 2663; GFX6-NEXT: s_or_b32 s0, s0, s1 2664; GFX6-NEXT: ; return to shader part epilog 2665; 2666; GFX8-LABEL: s_fshr_i16_4: 2667; GFX8: ; %bb.0: 2668; GFX8-NEXT: s_bfe_u32 s2, 12, 0x100000 2669; GFX8-NEXT: s_lshl_b32 s0, s0, s2 2670; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 2671; GFX8-NEXT: s_bfe_u32 s2, 4, 0x100000 2672; GFX8-NEXT: s_lshr_b32 s1, s1, s2 2673; GFX8-NEXT: s_or_b32 s0, s0, s1 2674; GFX8-NEXT: ; return to shader part epilog 2675; 2676; GFX9-LABEL: s_fshr_i16_4: 2677; GFX9: ; %bb.0: 2678; GFX9-NEXT: s_bfe_u32 s2, 12, 0x100000 2679; GFX9-NEXT: s_lshl_b32 s0, s0, s2 2680; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 2681; GFX9-NEXT: s_bfe_u32 s2, 4, 0x100000 2682; GFX9-NEXT: s_lshr_b32 s1, s1, s2 2683; GFX9-NEXT: s_or_b32 s0, s0, s1 2684; GFX9-NEXT: ; return to shader part epilog 2685; 2686; GFX10-LABEL: s_fshr_i16_4: 2687; GFX10: ; %bb.0: 2688; GFX10-NEXT: s_bfe_u32 s2, 12, 0x100000 2689; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 2690; GFX10-NEXT: s_bfe_u32 s3, 4, 0x100000 2691; GFX10-NEXT: s_lshl_b32 s0, s0, s2 2692; GFX10-NEXT: s_lshr_b32 s1, s1, s3 2693; GFX10-NEXT: s_or_b32 s0, s0, s1 2694; GFX10-NEXT: ; return to shader part epilog 2695 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4) 2696 ret i16 %result 2697} 2698 2699define amdgpu_ps i16 @s_fshr_i16_5(i16 inreg %lhs, i16 inreg %rhs) { 2700; GFX6-LABEL: s_fshr_i16_5: 2701; GFX6: ; %bb.0: 2702; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 2703; GFX6-NEXT: s_lshl_b32 s0, s0, 11 2704; GFX6-NEXT: s_lshr_b32 s1, s1, 5 2705; GFX6-NEXT: s_or_b32 s0, s0, s1 2706; GFX6-NEXT: ; return to shader part epilog 2707; 2708; GFX8-LABEL: s_fshr_i16_5: 2709; GFX8: ; %bb.0: 2710; GFX8-NEXT: s_bfe_u32 s2, 11, 0x100000 2711; GFX8-NEXT: s_lshl_b32 s0, s0, s2 2712; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 2713; GFX8-NEXT: s_bfe_u32 s2, 5, 0x100000 2714; GFX8-NEXT: s_lshr_b32 s1, s1, s2 2715; GFX8-NEXT: s_or_b32 s0, s0, s1 2716; GFX8-NEXT: ; return to shader part epilog 2717; 2718; GFX9-LABEL: s_fshr_i16_5: 2719; GFX9: ; %bb.0: 2720; GFX9-NEXT: s_bfe_u32 s2, 11, 0x100000 2721; GFX9-NEXT: s_lshl_b32 s0, s0, s2 2722; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 2723; GFX9-NEXT: s_bfe_u32 s2, 5, 0x100000 2724; GFX9-NEXT: s_lshr_b32 s1, s1, s2 2725; GFX9-NEXT: s_or_b32 s0, s0, s1 2726; GFX9-NEXT: ; return to shader part epilog 2727; 2728; GFX10-LABEL: s_fshr_i16_5: 2729; GFX10: ; %bb.0: 2730; GFX10-NEXT: s_bfe_u32 s2, 11, 0x100000 2731; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 2732; GFX10-NEXT: s_bfe_u32 s3, 5, 0x100000 2733; GFX10-NEXT: s_lshl_b32 s0, s0, s2 2734; GFX10-NEXT: s_lshr_b32 s1, s1, s3 2735; GFX10-NEXT: s_or_b32 s0, s0, s1 2736; GFX10-NEXT: ; return to shader part epilog 2737 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5) 2738 ret i16 %result 2739} 2740 2741define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) { 2742; GFX6-LABEL: v_fshr_i16: 2743; GFX6: ; %bb.0: 2744; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2745; GFX6-NEXT: v_and_b32_e32 v3, 15, v2 2746; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 2747; GFX6-NEXT: v_and_b32_e32 v2, 15, v2 2748; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2749; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 2750; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 2751; GFX6-NEXT: v_bfe_u32 v2, v3, 0, 16 2752; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 2753; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 2754; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 2755; GFX6-NEXT: s_setpc_b64 s[30:31] 2756; 2757; GFX8-LABEL: v_fshr_i16: 2758; GFX8: ; %bb.0: 2759; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2760; GFX8-NEXT: v_and_b32_e32 v3, 15, v2 2761; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 2762; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 2763; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 2764; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 2765; GFX8-NEXT: v_lshrrev_b16_e32 v1, v3, v1 2766; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2767; GFX8-NEXT: s_setpc_b64 s[30:31] 2768; 2769; GFX9-LABEL: v_fshr_i16: 2770; GFX9: ; %bb.0: 2771; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2772; GFX9-NEXT: v_and_b32_e32 v3, 15, v2 2773; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 2774; GFX9-NEXT: v_and_b32_e32 v2, 15, v2 2775; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 2776; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 2777; GFX9-NEXT: v_lshrrev_b16_e32 v1, v3, v1 2778; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 2779; GFX9-NEXT: s_setpc_b64 s[30:31] 2780; 2781; GFX10-LABEL: v_fshr_i16: 2782; GFX10: ; %bb.0: 2783; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2784; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2785; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 2786; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 2787; GFX10-NEXT: v_and_b32_e32 v2, 15, v2 2788; GFX10-NEXT: v_and_b32_e32 v3, 15, v3 2789; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 2790; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 2791; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 2792; GFX10-NEXT: s_setpc_b64 s[30:31] 2793 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 2794 ret i16 %result 2795} 2796 2797define i16 @v_fshr_i16_4(i16 %lhs, i16 %rhs) { 2798; GFX6-LABEL: v_fshr_i16_4: 2799; GFX6: ; %bb.0: 2800; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2801; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 2802; GFX6-NEXT: v_lshlrev_b32_e32 v0, 12, v0 2803; GFX6-NEXT: v_lshrrev_b32_e32 v1, 4, v1 2804; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 2805; GFX6-NEXT: s_setpc_b64 s[30:31] 2806; 2807; GFX8-LABEL: v_fshr_i16_4: 2808; GFX8: ; %bb.0: 2809; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2810; GFX8-NEXT: v_lshlrev_b16_e32 v0, 12, v0 2811; GFX8-NEXT: v_lshrrev_b16_e32 v1, 4, v1 2812; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2813; GFX8-NEXT: s_setpc_b64 s[30:31] 2814; 2815; GFX9-LABEL: v_fshr_i16_4: 2816; GFX9: ; %bb.0: 2817; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2818; GFX9-NEXT: v_lshlrev_b16_e32 v0, 12, v0 2819; GFX9-NEXT: v_lshrrev_b16_e32 v1, 4, v1 2820; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 2821; GFX9-NEXT: s_setpc_b64 s[30:31] 2822; 2823; GFX10-LABEL: v_fshr_i16_4: 2824; GFX10: ; %bb.0: 2825; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2826; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2827; GFX10-NEXT: v_lshlrev_b16 v0, 12, v0 2828; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 2829; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 2830; GFX10-NEXT: s_setpc_b64 s[30:31] 2831 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4) 2832 ret i16 %result 2833} 2834 2835define i16 @v_fshr_i16_5(i16 %lhs, i16 %rhs) { 2836; GFX6-LABEL: v_fshr_i16_5: 2837; GFX6: ; %bb.0: 2838; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2839; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 2840; GFX6-NEXT: v_lshlrev_b32_e32 v0, 11, v0 2841; GFX6-NEXT: v_lshrrev_b32_e32 v1, 5, v1 2842; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 2843; GFX6-NEXT: s_setpc_b64 s[30:31] 2844; 2845; GFX8-LABEL: v_fshr_i16_5: 2846; GFX8: ; %bb.0: 2847; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2848; GFX8-NEXT: v_lshlrev_b16_e32 v0, 11, v0 2849; GFX8-NEXT: v_lshrrev_b16_e32 v1, 5, v1 2850; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2851; GFX8-NEXT: s_setpc_b64 s[30:31] 2852; 2853; GFX9-LABEL: v_fshr_i16_5: 2854; GFX9: ; %bb.0: 2855; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2856; GFX9-NEXT: v_lshlrev_b16_e32 v0, 11, v0 2857; GFX9-NEXT: v_lshrrev_b16_e32 v1, 5, v1 2858; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 2859; GFX9-NEXT: s_setpc_b64 s[30:31] 2860; 2861; GFX10-LABEL: v_fshr_i16_5: 2862; GFX10: ; %bb.0: 2863; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2864; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2865; GFX10-NEXT: v_lshlrev_b16 v0, 11, v0 2866; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1 2867; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 2868; GFX10-NEXT: s_setpc_b64 s[30:31] 2869 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5) 2870 ret i16 %result 2871} 2872 2873define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) { 2874; GFX6-LABEL: v_fshr_i16_ssv: 2875; GFX6: ; %bb.0: 2876; GFX6-NEXT: v_and_b32_e32 v1, 15, v0 2877; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 2878; GFX6-NEXT: v_and_b32_e32 v0, 15, v0 2879; GFX6-NEXT: s_lshl_b32 s0, s0, 1 2880; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 2881; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 2882; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 2883; GFX6-NEXT: s_and_b32 s0, s1, 0xffff 2884; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 2885; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 2886; GFX6-NEXT: ; return to shader part epilog 2887; 2888; GFX8-LABEL: v_fshr_i16_ssv: 2889; GFX8: ; %bb.0: 2890; GFX8-NEXT: v_and_b32_e32 v1, 15, v0 2891; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 2892; GFX8-NEXT: s_bfe_u32 s2, 1, 0x100000 2893; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 2894; GFX8-NEXT: s_lshl_b32 s0, s0, s2 2895; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0 2896; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s1 2897; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2898; GFX8-NEXT: ; return to shader part epilog 2899; 2900; GFX9-LABEL: v_fshr_i16_ssv: 2901; GFX9: ; %bb.0: 2902; GFX9-NEXT: v_and_b32_e32 v1, 15, v0 2903; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 2904; GFX9-NEXT: s_bfe_u32 s2, 1, 0x100000 2905; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 2906; GFX9-NEXT: s_lshl_b32 s0, s0, s2 2907; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0 2908; GFX9-NEXT: v_lshrrev_b16_e64 v1, v1, s1 2909; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 2910; GFX9-NEXT: ; return to shader part epilog 2911; 2912; GFX10-LABEL: v_fshr_i16_ssv: 2913; GFX10: ; %bb.0: 2914; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 2915; GFX10-NEXT: v_and_b32_e32 v0, 15, v0 2916; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000 2917; GFX10-NEXT: s_lshl_b32 s0, s0, s2 2918; GFX10-NEXT: v_and_b32_e32 v1, 15, v1 2919; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1 2920; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0 2921; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 2922; GFX10-NEXT: ; return to shader part epilog 2923 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 2924 %cast.result = bitcast i16 %result to half 2925 ret half %cast.result 2926} 2927 2928define amdgpu_ps half @v_fshr_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) { 2929; GFX6-LABEL: v_fshr_i16_svs: 2930; GFX6: ; %bb.0: 2931; GFX6-NEXT: s_and_b32 s2, s1, 15 2932; GFX6-NEXT: s_andn2_b32 s1, 15, s1 2933; GFX6-NEXT: s_lshl_b32 s0, s0, 1 2934; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 2935; GFX6-NEXT: s_lshl_b32 s0, s0, s1 2936; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000 2937; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 2938; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0 2939; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 2940; GFX6-NEXT: ; return to shader part epilog 2941; 2942; GFX8-LABEL: v_fshr_i16_svs: 2943; GFX8: ; %bb.0: 2944; GFX8-NEXT: s_and_b32 s2, s1, 15 2945; GFX8-NEXT: s_andn2_b32 s1, 15, s1 2946; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000 2947; GFX8-NEXT: s_lshl_b32 s0, s0, s3 2948; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 2949; GFX8-NEXT: s_lshl_b32 s0, s0, s1 2950; GFX8-NEXT: v_lshrrev_b16_e32 v0, s2, v0 2951; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 2952; GFX8-NEXT: ; return to shader part epilog 2953; 2954; GFX9-LABEL: v_fshr_i16_svs: 2955; GFX9: ; %bb.0: 2956; GFX9-NEXT: s_and_b32 s2, s1, 15 2957; GFX9-NEXT: s_andn2_b32 s1, 15, s1 2958; GFX9-NEXT: s_bfe_u32 s3, 1, 0x100000 2959; GFX9-NEXT: s_lshl_b32 s0, s0, s3 2960; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 2961; GFX9-NEXT: s_lshl_b32 s0, s0, s1 2962; GFX9-NEXT: v_lshrrev_b16_e32 v0, s2, v0 2963; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 2964; GFX9-NEXT: ; return to shader part epilog 2965; 2966; GFX10-LABEL: v_fshr_i16_svs: 2967; GFX10: ; %bb.0: 2968; GFX10-NEXT: s_and_b32 s2, s1, 15 2969; GFX10-NEXT: s_bfe_u32 s3, 1, 0x100000 2970; GFX10-NEXT: s_andn2_b32 s1, 15, s1 2971; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0 2972; GFX10-NEXT: s_lshl_b32 s0, s0, s3 2973; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 2974; GFX10-NEXT: s_lshl_b32 s0, s0, s1 2975; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 2976; GFX10-NEXT: ; return to shader part epilog 2977 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 2978 %cast.result = bitcast i16 %result to half 2979 ret half %cast.result 2980} 2981 2982define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) { 2983; GFX6-LABEL: v_fshr_i16_vss: 2984; GFX6: ; %bb.0: 2985; GFX6-NEXT: s_and_b32 s2, s1, 15 2986; GFX6-NEXT: s_andn2_b32 s1, 15, s1 2987; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2988; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 2989; GFX6-NEXT: v_lshlrev_b32_e32 v0, s1, v0 2990; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000 2991; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 2992; GFX6-NEXT: s_lshr_b32 s0, s0, s1 2993; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 2994; GFX6-NEXT: ; return to shader part epilog 2995; 2996; GFX8-LABEL: v_fshr_i16_vss: 2997; GFX8: ; %bb.0: 2998; GFX8-NEXT: s_and_b32 s2, s1, 15 2999; GFX8-NEXT: s_andn2_b32 s1, 15, s1 3000; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 3001; GFX8-NEXT: v_lshlrev_b16_e32 v0, s1, v0 3002; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 3003; GFX8-NEXT: s_bfe_u32 s1, s2, 0x100000 3004; GFX8-NEXT: s_lshr_b32 s0, s0, s1 3005; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 3006; GFX8-NEXT: ; return to shader part epilog 3007; 3008; GFX9-LABEL: v_fshr_i16_vss: 3009; GFX9: ; %bb.0: 3010; GFX9-NEXT: s_and_b32 s2, s1, 15 3011; GFX9-NEXT: s_andn2_b32 s1, 15, s1 3012; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 3013; GFX9-NEXT: v_lshlrev_b16_e32 v0, s1, v0 3014; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 3015; GFX9-NEXT: s_bfe_u32 s1, s2, 0x100000 3016; GFX9-NEXT: s_lshr_b32 s0, s0, s1 3017; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 3018; GFX9-NEXT: ; return to shader part epilog 3019; 3020; GFX10-LABEL: v_fshr_i16_vss: 3021; GFX10: ; %bb.0: 3022; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 3023; GFX10-NEXT: s_andn2_b32 s2, 15, s1 3024; GFX10-NEXT: s_and_b32 s1, s1, 15 3025; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 3026; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 3027; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0 3028; GFX10-NEXT: s_lshr_b32 s0, s0, s1 3029; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 3030; GFX10-NEXT: ; return to shader part epilog 3031 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 3032 %cast.result = bitcast i16 %result to half 3033 ret half %cast.result 3034} 3035 3036define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { 3037; GFX6-LABEL: s_fshr_v2i16: 3038; GFX6: ; %bb.0: 3039; GFX6-NEXT: s_mov_b32 s6, 0xffff 3040; GFX6-NEXT: s_lshl_b32 s5, s5, 16 3041; GFX6-NEXT: s_and_b32 s4, s4, s6 3042; GFX6-NEXT: s_or_b32 s4, s5, s4 3043; GFX6-NEXT: s_bfe_u32 s5, 1, 0x100000 3044; GFX6-NEXT: s_lshl_b32 s0, s0, s5 3045; GFX6-NEXT: s_and_b32 s7, s2, s6 3046; GFX6-NEXT: s_lshl_b32 s1, s1, s5 3047; GFX6-NEXT: s_and_b32 s5, s3, s6 3048; GFX6-NEXT: s_lshr_b32 s7, s7, 15 3049; GFX6-NEXT: s_lshr_b32 s5, s5, 15 3050; GFX6-NEXT: s_lshl_b32 s2, s2, 1 3051; GFX6-NEXT: s_xor_b32 s4, s4, -1 3052; GFX6-NEXT: s_or_b32 s0, s0, s7 3053; GFX6-NEXT: s_or_b32 s1, s1, s5 3054; GFX6-NEXT: s_lshr_b32 s5, s4, 16 3055; GFX6-NEXT: s_and_b32 s7, s4, 15 3056; GFX6-NEXT: s_andn2_b32 s4, 15, s4 3057; GFX6-NEXT: s_and_b32 s2, s2, s6 3058; GFX6-NEXT: s_bfe_u32 s7, s7, 0x100000 3059; GFX6-NEXT: s_lshr_b32 s2, s2, 1 3060; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000 3061; GFX6-NEXT: s_lshl_b32 s0, s0, s7 3062; GFX6-NEXT: s_lshr_b32 s2, s2, s4 3063; GFX6-NEXT: s_or_b32 s0, s0, s2 3064; GFX6-NEXT: s_and_b32 s2, s5, 15 3065; GFX6-NEXT: s_lshl_b32 s3, s3, 1 3066; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 3067; GFX6-NEXT: s_andn2_b32 s4, 15, s5 3068; GFX6-NEXT: s_lshl_b32 s1, s1, s2 3069; GFX6-NEXT: s_and_b32 s2, s3, s6 3070; GFX6-NEXT: s_lshr_b32 s2, s2, 1 3071; GFX6-NEXT: s_bfe_u32 s3, s4, 0x100000 3072; GFX6-NEXT: s_lshr_b32 s2, s2, s3 3073; GFX6-NEXT: s_or_b32 s1, s1, s2 3074; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 3075; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 3076; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3077; GFX6-NEXT: s_or_b32 s0, s0, s1 3078; GFX6-NEXT: ; return to shader part epilog 3079; 3080; GFX8-LABEL: s_fshr_v2i16: 3081; GFX8: ; %bb.0: 3082; GFX8-NEXT: s_bfe_u32 s5, 1, 0x100000 3083; GFX8-NEXT: s_bfe_u32 s6, s1, 0x100000 3084; GFX8-NEXT: s_bfe_u32 s7, 15, 0x100000 3085; GFX8-NEXT: s_lshr_b32 s3, s0, 16 3086; GFX8-NEXT: s_lshr_b32 s4, s1, 16 3087; GFX8-NEXT: s_lshl_b32 s0, s0, s5 3088; GFX8-NEXT: s_lshr_b32 s6, s6, s7 3089; GFX8-NEXT: s_or_b32 s0, s0, s6 3090; GFX8-NEXT: s_lshl_b32 s3, s3, s5 3091; GFX8-NEXT: s_lshr_b32 s6, s4, s7 3092; GFX8-NEXT: s_lshl_b32 s1, s1, s5 3093; GFX8-NEXT: s_xor_b32 s2, s2, -1 3094; GFX8-NEXT: s_or_b32 s3, s3, s6 3095; GFX8-NEXT: s_lshr_b32 s6, s2, 16 3096; GFX8-NEXT: s_and_b32 s7, s2, 15 3097; GFX8-NEXT: s_andn2_b32 s2, 15, s2 3098; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3099; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000 3100; GFX8-NEXT: s_lshr_b32 s1, s1, s5 3101; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3102; GFX8-NEXT: s_lshl_b32 s0, s0, s7 3103; GFX8-NEXT: s_lshr_b32 s1, s1, s2 3104; GFX8-NEXT: s_or_b32 s0, s0, s1 3105; GFX8-NEXT: s_and_b32 s1, s6, 15 3106; GFX8-NEXT: s_lshl_b32 s4, s4, s5 3107; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3108; GFX8-NEXT: s_andn2_b32 s2, 15, s6 3109; GFX8-NEXT: s_lshl_b32 s1, s3, s1 3110; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000 3111; GFX8-NEXT: s_lshr_b32 s3, s3, s5 3112; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3113; GFX8-NEXT: s_lshr_b32 s2, s3, s2 3114; GFX8-NEXT: s_or_b32 s1, s1, s2 3115; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3116; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 3117; GFX8-NEXT: s_lshl_b32 s1, s1, 16 3118; GFX8-NEXT: s_or_b32 s0, s0, s1 3119; GFX8-NEXT: ; return to shader part epilog 3120; 3121; GFX9-LABEL: s_fshr_v2i16: 3122; GFX9: ; %bb.0: 3123; GFX9-NEXT: s_mov_b32 s3, 0xf000f 3124; GFX9-NEXT: s_and_b32 s4, s2, s3 3125; GFX9-NEXT: s_andn2_b32 s2, s3, s2 3126; GFX9-NEXT: s_lshr_b32 s3, s0, 16 3127; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 3128; GFX9-NEXT: s_lshl_b32 s3, s3, 1 3129; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3 3130; GFX9-NEXT: s_lshr_b32 s3, s0, 16 3131; GFX9-NEXT: s_lshr_b32 s5, s2, 16 3132; GFX9-NEXT: s_lshl_b32 s0, s0, s2 3133; GFX9-NEXT: s_lshl_b32 s2, s3, s5 3134; GFX9-NEXT: s_mov_b32 s3, 0xffff 3135; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3136; GFX9-NEXT: s_lshr_b32 s2, s1, 16 3137; GFX9-NEXT: s_and_b32 s1, s1, s3 3138; GFX9-NEXT: s_lshr_b32 s5, s4, 16 3139; GFX9-NEXT: s_and_b32 s3, s4, s3 3140; GFX9-NEXT: s_lshr_b32 s1, s1, s3 3141; GFX9-NEXT: s_lshr_b32 s2, s2, s5 3142; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 3143; GFX9-NEXT: s_or_b32 s0, s0, s1 3144; GFX9-NEXT: ; return to shader part epilog 3145; 3146; GFX10-LABEL: s_fshr_v2i16: 3147; GFX10: ; %bb.0: 3148; GFX10-NEXT: s_lshr_b32 s4, s0, 16 3149; GFX10-NEXT: s_mov_b32 s3, 0xf000f 3150; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 3151; GFX10-NEXT: s_lshl_b32 s4, s4, 1 3152; GFX10-NEXT: s_and_b32 s5, s2, s3 3153; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 3154; GFX10-NEXT: s_andn2_b32 s2, s3, s2 3155; GFX10-NEXT: s_lshr_b32 s3, s0, 16 3156; GFX10-NEXT: s_lshr_b32 s4, s2, 16 3157; GFX10-NEXT: s_lshl_b32 s0, s0, s2 3158; GFX10-NEXT: s_lshl_b32 s2, s3, s4 3159; GFX10-NEXT: s_mov_b32 s3, 0xffff 3160; GFX10-NEXT: s_lshr_b32 s4, s1, 16 3161; GFX10-NEXT: s_and_b32 s1, s1, s3 3162; GFX10-NEXT: s_and_b32 s3, s5, s3 3163; GFX10-NEXT: s_lshr_b32 s5, s5, 16 3164; GFX10-NEXT: s_lshr_b32 s1, s1, s3 3165; GFX10-NEXT: s_lshr_b32 s3, s4, s5 3166; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3167; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3 3168; GFX10-NEXT: s_or_b32 s0, s0, s1 3169; GFX10-NEXT: ; return to shader part epilog 3170 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3171 %cast = bitcast <2 x i16> %result to i32 3172 ret i32 %cast 3173} 3174 3175define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) { 3176; GFX6-LABEL: v_fshr_v2i16: 3177; GFX6: ; %bb.0: 3178; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3179; GFX6-NEXT: v_mov_b32_e32 v6, 0xffff 3180; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 3181; GFX6-NEXT: v_and_b32_e32 v4, v4, v6 3182; GFX6-NEXT: s_mov_b32 s5, 0xffff 3183; GFX6-NEXT: v_or_b32_e32 v4, v5, v4 3184; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000 3185; GFX6-NEXT: v_and_b32_e32 v5, s5, v2 3186; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 3187; GFX6-NEXT: v_lshrrev_b32_e32 v5, 15, v5 3188; GFX6-NEXT: v_or_b32_e32 v0, v0, v5 3189; GFX6-NEXT: v_and_b32_e32 v5, s5, v3 3190; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1 3191; GFX6-NEXT: v_lshrrev_b32_e32 v5, 15, v5 3192; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 3193; GFX6-NEXT: v_or_b32_e32 v1, v1, v5 3194; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2 3195; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4 3196; GFX6-NEXT: v_and_b32_e32 v7, 15, v4 3197; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 3198; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 3199; GFX6-NEXT: v_and_b32_e32 v2, v2, v6 3200; GFX6-NEXT: v_bfe_u32 v7, v7, 0, 16 3201; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3202; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16 3203; GFX6-NEXT: v_lshlrev_b32_e32 v0, v7, v0 3204; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 3205; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 3206; GFX6-NEXT: v_and_b32_e32 v2, 15, v5 3207; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 3208; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5 3209; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 3210; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 3211; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1 3212; GFX6-NEXT: v_and_b32_e32 v2, v3, v6 3213; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3214; GFX6-NEXT: v_bfe_u32 v3, v4, 0, 16 3215; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2 3216; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 3217; GFX6-NEXT: s_setpc_b64 s[30:31] 3218; 3219; GFX8-LABEL: v_fshr_v2i16: 3220; GFX8: ; %bb.0: 3221; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3222; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0 3223; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v1 3224; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 3225; GFX8-NEXT: v_mov_b32_e32 v4, 1 3226; GFX8-NEXT: v_mov_b32_e32 v5, 15 3227; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3228; GFX8-NEXT: v_lshrrev_b16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3229; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 3230; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 3231; GFX8-NEXT: v_lshlrev_b16_e32 v5, 1, v1 3232; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3233; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 3234; GFX8-NEXT: v_and_b32_e32 v6, 15, v2 3235; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 3236; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 3237; GFX8-NEXT: v_lshrrev_b16_e32 v5, 1, v5 3238; GFX8-NEXT: v_lshlrev_b16_e32 v3, v6, v3 3239; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v5 3240; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 3241; GFX8-NEXT: v_and_b32_e32 v3, 15, v4 3242; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 3243; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 3244; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1 3245; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 3246; GFX8-NEXT: v_lshrrev_b16_e32 v1, v4, v1 3247; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3248; GFX8-NEXT: v_mov_b32_e32 v1, 16 3249; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 3250; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3251; GFX8-NEXT: s_setpc_b64 s[30:31] 3252; 3253; GFX9-LABEL: v_fshr_v2i16: 3254; GFX9: ; %bb.0: 3255; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3256; GFX9-NEXT: s_mov_b32 s4, 0xf000f 3257; GFX9-NEXT: v_and_b32_e32 v3, s4, v2 3258; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 3259; GFX9-NEXT: v_and_b32_e32 v2, s4, v2 3260; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 3261; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0 3262; GFX9-NEXT: v_pk_lshrrev_b16 v1, v3, v1 3263; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3264; GFX9-NEXT: s_setpc_b64 s[30:31] 3265; 3266; GFX10-LABEL: v_fshr_v2i16: 3267; GFX10: ; %bb.0: 3268; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3269; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3270; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 3271; GFX10-NEXT: s_mov_b32 s4, 0xf000f 3272; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 3273; GFX10-NEXT: v_and_b32_e32 v2, s4, v2 3274; GFX10-NEXT: v_and_b32_e32 v3, s4, v3 3275; GFX10-NEXT: v_pk_lshrrev_b16 v1, v2, v1 3276; GFX10-NEXT: v_pk_lshlrev_b16 v0, v3, v0 3277; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3278; GFX10-NEXT: s_setpc_b64 s[30:31] 3279 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3280 ret <2 x i16> %result 3281} 3282 3283define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) { 3284; GFX6-LABEL: v_fshr_v2i16_4_8: 3285; GFX6: ; %bb.0: 3286; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3287; GFX6-NEXT: s_bfe_u32 s4, 12, 0x100000 3288; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 3289; GFX6-NEXT: s_mov_b32 s4, 0xffff 3290; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 3291; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3292; GFX6-NEXT: s_bfe_u32 s5, 3, 0x100000 3293; GFX6-NEXT: v_lshrrev_b32_e32 v2, s5, v2 3294; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 3295; GFX6-NEXT: v_and_b32_e32 v2, s4, v3 3296; GFX6-NEXT: s_bfe_u32 s5, 8, 0x100000 3297; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3298; GFX6-NEXT: s_bfe_u32 s4, 7, 0x100000 3299; GFX6-NEXT: v_lshlrev_b32_e32 v1, s5, v1 3300; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2 3301; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 3302; GFX6-NEXT: s_setpc_b64 s[30:31] 3303; 3304; GFX8-LABEL: v_fshr_v2i16_4_8: 3305; GFX8: ; %bb.0: 3306; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3307; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 3308; GFX8-NEXT: v_lshlrev_b16_e32 v0, 12, v0 3309; GFX8-NEXT: v_lshrrev_b16_e32 v3, 4, v1 3310; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 3311; GFX8-NEXT: v_mov_b32_e32 v3, 8 3312; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2 3313; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3314; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 3315; GFX8-NEXT: v_mov_b32_e32 v2, 16 3316; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 3317; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3318; GFX8-NEXT: s_setpc_b64 s[30:31] 3319; 3320; GFX9-LABEL: v_fshr_v2i16_4_8: 3321; GFX9: ; %bb.0: 3322; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3323; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 16 3324; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 3325; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 16 3326; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 3327; GFX9-NEXT: s_mov_b32 s4, 0x4f7ffffe 3328; GFX9-NEXT: v_mul_f32_e32 v2, s4, v2 3329; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 3330; GFX9-NEXT: v_mul_f32_e32 v3, s4, v3 3331; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 3332; GFX9-NEXT: v_mul_lo_u32 v4, -16, v2 3333; GFX9-NEXT: v_mul_lo_u32 v5, -16, v3 3334; GFX9-NEXT: v_mul_hi_u32 v4, v2, v4 3335; GFX9-NEXT: v_mul_hi_u32 v5, v3, v5 3336; GFX9-NEXT: v_add_u32_e32 v2, v2, v4 3337; GFX9-NEXT: v_mul_hi_u32 v2, 4, v2 3338; GFX9-NEXT: v_add_u32_e32 v3, v3, v5 3339; GFX9-NEXT: v_mul_hi_u32 v3, 8, v3 3340; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v2 3341; GFX9-NEXT: v_sub_u32_e32 v2, 4, v2 3342; GFX9-NEXT: v_subrev_u32_e32 v4, 16, v2 3343; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 16, v2 3344; GFX9-NEXT: v_lshlrev_b32_e32 v3, 4, v3 3345; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3346; GFX9-NEXT: v_subrev_u32_e32 v4, 16, v2 3347; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 16, v2 3348; GFX9-NEXT: v_sub_u32_e32 v3, 8, v3 3349; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 3350; GFX9-NEXT: v_subrev_u32_e32 v4, 16, v3 3351; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 16, v3 3352; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 3353; GFX9-NEXT: v_subrev_u32_e32 v4, 16, v3 3354; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 16, v3 3355; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 3356; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 3357; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3358; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 3359; GFX9-NEXT: v_pk_sub_i16 v3, 16, v2 op_sel_hi:[0,1] 3360; GFX9-NEXT: v_pk_lshlrev_b16 v0, v3, v0 3361; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1 3362; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3363; GFX9-NEXT: s_setpc_b64 s[30:31] 3364; 3365; GFX10-LABEL: v_fshr_v2i16_4_8: 3366; GFX10: ; %bb.0: 3367; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3368; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3369; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v2, 16 3370; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 16 3371; GFX10-NEXT: s_mov_b32 s4, 0x4f7ffffe 3372; GFX10-NEXT: v_rcp_iflag_f32_e32 v2, v2 3373; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 3374; GFX10-NEXT: v_mul_f32_e32 v2, s4, v2 3375; GFX10-NEXT: v_mul_f32_e32 v3, s4, v3 3376; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 3377; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 3378; GFX10-NEXT: v_mul_lo_u32 v4, -16, v2 3379; GFX10-NEXT: v_mul_lo_u32 v5, -16, v3 3380; GFX10-NEXT: v_mul_hi_u32 v4, v2, v4 3381; GFX10-NEXT: v_mul_hi_u32 v5, v3, v5 3382; GFX10-NEXT: v_add_nc_u32_e32 v2, v2, v4 3383; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v5 3384; GFX10-NEXT: v_mul_hi_u32 v2, 8, v2 3385; GFX10-NEXT: v_mul_hi_u32 v3, 4, v3 3386; GFX10-NEXT: v_lshlrev_b32_e32 v2, 4, v2 3387; GFX10-NEXT: v_lshlrev_b32_e32 v3, 4, v3 3388; GFX10-NEXT: v_sub_nc_u32_e32 v2, 8, v2 3389; GFX10-NEXT: v_sub_nc_u32_e32 v3, 4, v3 3390; GFX10-NEXT: v_subrev_nc_u32_e32 v4, 16, v2 3391; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v2 3392; GFX10-NEXT: v_subrev_nc_u32_e32 v5, 16, v3 3393; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo 3394; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v3 3395; GFX10-NEXT: v_subrev_nc_u32_e32 v4, 16, v2 3396; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo 3397; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v2 3398; GFX10-NEXT: v_subrev_nc_u32_e32 v5, 16, v3 3399; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo 3400; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v3 3401; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3402; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo 3403; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v3, v2 3404; GFX10-NEXT: v_pk_sub_i16 v3, 16, v2 op_sel_hi:[0,1] 3405; GFX10-NEXT: v_pk_lshrrev_b16 v1, v2, v1 3406; GFX10-NEXT: v_pk_lshlrev_b16 v0, v3, v0 3407; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3408; GFX10-NEXT: s_setpc_b64 s[30:31] 3409 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>) 3410 ret <2 x i16> %result 3411} 3412 3413define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) { 3414; GFX6-LABEL: v_fshr_v2i16_ssv: 3415; GFX6: ; %bb.0: 3416; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3417; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 3418; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 3419; GFX6-NEXT: s_mov_b32 s5, 0xffff 3420; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000 3421; GFX6-NEXT: s_and_b32 s6, s2, s5 3422; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 3423; GFX6-NEXT: s_lshl_b32 s0, s0, s4 3424; GFX6-NEXT: s_lshr_b32 s6, s6, 15 3425; GFX6-NEXT: v_and_b32_e32 v2, 15, v0 3426; GFX6-NEXT: s_or_b32 s0, s0, s6 3427; GFX6-NEXT: s_lshl_b32 s2, s2, 1 3428; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 3429; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 3430; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 3431; GFX6-NEXT: v_and_b32_e32 v0, 15, v0 3432; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 3433; GFX6-NEXT: s_and_b32 s0, s2, s5 3434; GFX6-NEXT: s_lshr_b32 s0, s0, 1 3435; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 3436; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 3437; GFX6-NEXT: s_lshl_b32 s1, s1, s4 3438; GFX6-NEXT: s_and_b32 s4, s3, s5 3439; GFX6-NEXT: s_lshl_b32 s3, s3, 1 3440; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 3441; GFX6-NEXT: v_and_b32_e32 v2, 15, v1 3442; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 3443; GFX6-NEXT: s_lshr_b32 s4, s4, 15 3444; GFX6-NEXT: v_and_b32_e32 v1, 15, v1 3445; GFX6-NEXT: s_and_b32 s0, s3, s5 3446; GFX6-NEXT: s_or_b32 s1, s1, s4 3447; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 3448; GFX6-NEXT: s_lshr_b32 s0, s0, 1 3449; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 3450; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2 3451; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 3452; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 3453; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 3454; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 3455; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3456; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3457; GFX6-NEXT: ; return to shader part epilog 3458; 3459; GFX8-LABEL: v_fshr_v2i16_ssv: 3460; GFX8: ; %bb.0: 3461; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000 3462; GFX8-NEXT: s_bfe_u32 s5, s1, 0x100000 3463; GFX8-NEXT: s_bfe_u32 s6, 15, 0x100000 3464; GFX8-NEXT: s_lshr_b32 s2, s0, 16 3465; GFX8-NEXT: s_lshl_b32 s0, s0, s4 3466; GFX8-NEXT: s_lshr_b32 s5, s5, s6 3467; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 3468; GFX8-NEXT: s_lshr_b32 s3, s1, 16 3469; GFX8-NEXT: s_or_b32 s0, s0, s5 3470; GFX8-NEXT: s_lshl_b32 s1, s1, s4 3471; GFX8-NEXT: v_and_b32_e32 v2, 15, v0 3472; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 3473; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 3474; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s0 3475; GFX8-NEXT: s_bfe_u32 s0, s1, 0x100000 3476; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 3477; GFX8-NEXT: s_lshr_b32 s0, s0, s4 3478; GFX8-NEXT: s_lshr_b32 s5, s3, s6 3479; GFX8-NEXT: s_lshl_b32 s3, s3, s4 3480; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0 3481; GFX8-NEXT: s_lshl_b32 s2, s2, s4 3482; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 3483; GFX8-NEXT: v_and_b32_e32 v2, 15, v1 3484; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1 3485; GFX8-NEXT: s_bfe_u32 s0, s3, 0x100000 3486; GFX8-NEXT: s_or_b32 s2, s2, s5 3487; GFX8-NEXT: v_and_b32_e32 v1, 15, v1 3488; GFX8-NEXT: s_lshr_b32 s0, s0, s4 3489; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2 3490; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s0 3491; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 3492; GFX8-NEXT: v_mov_b32_e32 v2, 16 3493; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 3494; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3495; GFX8-NEXT: ; return to shader part epilog 3496; 3497; GFX9-LABEL: v_fshr_v2i16_ssv: 3498; GFX9: ; %bb.0: 3499; GFX9-NEXT: s_mov_b32 s2, 0xf000f 3500; GFX9-NEXT: v_and_b32_e32 v1, s2, v0 3501; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 3502; GFX9-NEXT: v_and_b32_e32 v0, s2, v0 3503; GFX9-NEXT: s_lshr_b32 s2, s0, 16 3504; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 3505; GFX9-NEXT: s_lshl_b32 s2, s2, 1 3506; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3507; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, s0 3508; GFX9-NEXT: v_pk_lshrrev_b16 v1, v1, s1 3509; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3510; GFX9-NEXT: ; return to shader part epilog 3511; 3512; GFX10-LABEL: v_fshr_v2i16_ssv: 3513; GFX10: ; %bb.0: 3514; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 3515; GFX10-NEXT: s_mov_b32 s2, 0xf000f 3516; GFX10-NEXT: s_lshr_b32 s3, s0, 16 3517; GFX10-NEXT: v_and_b32_e32 v0, s2, v0 3518; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 3519; GFX10-NEXT: v_and_b32_e32 v1, s2, v1 3520; GFX10-NEXT: s_lshl_b32 s2, s3, 1 3521; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3522; GFX10-NEXT: v_pk_lshrrev_b16 v0, v0, s1 3523; GFX10-NEXT: v_pk_lshlrev_b16 v1, v1, s0 3524; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 3525; GFX10-NEXT: ; return to shader part epilog 3526 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3527 %cast = bitcast <2 x i16> %result to float 3528 ret float %cast 3529} 3530 3531define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) { 3532; GFX6-LABEL: v_fshr_v2i16_svs: 3533; GFX6: ; %bb.0: 3534; GFX6-NEXT: s_mov_b32 s4, 0xffff 3535; GFX6-NEXT: s_lshl_b32 s3, s3, 16 3536; GFX6-NEXT: s_and_b32 s2, s2, s4 3537; GFX6-NEXT: s_or_b32 s2, s3, s2 3538; GFX6-NEXT: s_bfe_u32 s3, 1, 0x100000 3539; GFX6-NEXT: v_and_b32_e32 v2, s4, v0 3540; GFX6-NEXT: s_lshl_b32 s0, s0, s3 3541; GFX6-NEXT: v_lshrrev_b32_e32 v2, 15, v2 3542; GFX6-NEXT: v_and_b32_e32 v3, s4, v1 3543; GFX6-NEXT: v_or_b32_e32 v2, s0, v2 3544; GFX6-NEXT: s_lshl_b32 s0, s1, s3 3545; GFX6-NEXT: v_lshrrev_b32_e32 v3, 15, v3 3546; GFX6-NEXT: v_or_b32_e32 v3, s0, v3 3547; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 3548; GFX6-NEXT: s_xor_b32 s0, s2, -1 3549; GFX6-NEXT: s_lshr_b32 s1, s0, 16 3550; GFX6-NEXT: s_and_b32 s2, s0, 15 3551; GFX6-NEXT: s_andn2_b32 s0, 15, s0 3552; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 3553; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3554; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 3555; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 3556; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 3557; GFX6-NEXT: v_lshrrev_b32_e32 v0, s0, v0 3558; GFX6-NEXT: s_and_b32 s0, s1, 15 3559; GFX6-NEXT: v_lshlrev_b32_e32 v2, s2, v2 3560; GFX6-NEXT: s_andn2_b32 s1, 15, s1 3561; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 3562; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 3563; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 3564; GFX6-NEXT: v_lshlrev_b32_e32 v2, s0, v3 3565; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3566; GFX6-NEXT: s_bfe_u32 s0, s1, 0x100000 3567; GFX6-NEXT: v_lshrrev_b32_e32 v1, s0, v1 3568; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 3569; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 3570; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 3571; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3572; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3573; GFX6-NEXT: ; return to shader part epilog 3574; 3575; GFX8-LABEL: v_fshr_v2i16_svs: 3576; GFX8: ; %bb.0: 3577; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000 3578; GFX8-NEXT: s_lshr_b32 s2, s0, 16 3579; GFX8-NEXT: s_lshl_b32 s0, s0, s3 3580; GFX8-NEXT: v_lshrrev_b16_e32 v1, 15, v0 3581; GFX8-NEXT: v_mov_b32_e32 v2, 15 3582; GFX8-NEXT: v_or_b32_e32 v1, s0, v1 3583; GFX8-NEXT: s_lshl_b32 s0, s2, s3 3584; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3585; GFX8-NEXT: v_or_b32_e32 v2, s0, v2 3586; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0 3587; GFX8-NEXT: v_mov_b32_e32 v4, 1 3588; GFX8-NEXT: s_xor_b32 s0, s1, -1 3589; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3590; GFX8-NEXT: s_lshr_b32 s1, s0, 16 3591; GFX8-NEXT: s_and_b32 s2, s0, 15 3592; GFX8-NEXT: s_andn2_b32 s0, 15, s0 3593; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3 3594; GFX8-NEXT: v_lshrrev_b16_e32 v3, s0, v3 3595; GFX8-NEXT: s_and_b32 s0, s1, 15 3596; GFX8-NEXT: s_andn2_b32 s1, 15, s1 3597; GFX8-NEXT: v_lshrrev_b16_e32 v0, 1, v0 3598; GFX8-NEXT: v_lshlrev_b16_e32 v2, s0, v2 3599; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0 3600; GFX8-NEXT: v_lshlrev_b16_e32 v1, s2, v1 3601; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 3602; GFX8-NEXT: v_mov_b32_e32 v2, 16 3603; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 3604; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 3605; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3606; GFX8-NEXT: ; return to shader part epilog 3607; 3608; GFX9-LABEL: v_fshr_v2i16_svs: 3609; GFX9: ; %bb.0: 3610; GFX9-NEXT: s_mov_b32 s2, 0xf000f 3611; GFX9-NEXT: s_and_b32 s3, s1, s2 3612; GFX9-NEXT: s_andn2_b32 s1, s2, s1 3613; GFX9-NEXT: s_lshr_b32 s2, s0, 16 3614; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 3615; GFX9-NEXT: s_lshl_b32 s2, s2, 1 3616; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3617; GFX9-NEXT: s_lshr_b32 s2, s0, 16 3618; GFX9-NEXT: s_lshr_b32 s4, s1, 16 3619; GFX9-NEXT: s_lshl_b32 s0, s0, s1 3620; GFX9-NEXT: s_lshl_b32 s1, s2, s4 3621; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 3622; GFX9-NEXT: v_pk_lshrrev_b16 v0, s3, v0 3623; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 3624; GFX9-NEXT: ; return to shader part epilog 3625; 3626; GFX10-LABEL: v_fshr_v2i16_svs: 3627; GFX10: ; %bb.0: 3628; GFX10-NEXT: s_lshr_b32 s3, s0, 16 3629; GFX10-NEXT: s_mov_b32 s2, 0xf000f 3630; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 3631; GFX10-NEXT: s_lshl_b32 s3, s3, 1 3632; GFX10-NEXT: s_and_b32 s4, s1, s2 3633; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s3 3634; GFX10-NEXT: s_andn2_b32 s1, s2, s1 3635; GFX10-NEXT: s_lshr_b32 s2, s0, 16 3636; GFX10-NEXT: s_lshr_b32 s3, s1, 16 3637; GFX10-NEXT: v_pk_lshrrev_b16 v0, s4, v0 3638; GFX10-NEXT: s_lshl_b32 s0, s0, s1 3639; GFX10-NEXT: s_lshl_b32 s1, s2, s3 3640; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 3641; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 3642; GFX10-NEXT: ; return to shader part epilog 3643 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3644 %cast = bitcast <2 x i16> %result to float 3645 ret float %cast 3646} 3647 3648define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { 3649; GFX6-LABEL: v_fshr_v2i16_vss: 3650; GFX6: ; %bb.0: 3651; GFX6-NEXT: s_mov_b32 s4, 0xffff 3652; GFX6-NEXT: s_lshl_b32 s3, s3, 16 3653; GFX6-NEXT: s_and_b32 s2, s2, s4 3654; GFX6-NEXT: s_or_b32 s2, s3, s2 3655; GFX6-NEXT: s_bfe_u32 s3, 1, 0x100000 3656; GFX6-NEXT: v_lshlrev_b32_e32 v0, s3, v0 3657; GFX6-NEXT: s_and_b32 s5, s0, s4 3658; GFX6-NEXT: v_lshlrev_b32_e32 v1, s3, v1 3659; GFX6-NEXT: s_and_b32 s3, s1, s4 3660; GFX6-NEXT: s_lshr_b32 s5, s5, 15 3661; GFX6-NEXT: s_lshr_b32 s3, s3, 15 3662; GFX6-NEXT: s_lshl_b32 s0, s0, 1 3663; GFX6-NEXT: s_xor_b32 s2, s2, -1 3664; GFX6-NEXT: v_or_b32_e32 v0, s5, v0 3665; GFX6-NEXT: v_or_b32_e32 v1, s3, v1 3666; GFX6-NEXT: s_lshr_b32 s3, s2, 16 3667; GFX6-NEXT: s_and_b32 s5, s2, 15 3668; GFX6-NEXT: s_andn2_b32 s2, 15, s2 3669; GFX6-NEXT: s_and_b32 s0, s0, s4 3670; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000 3671; GFX6-NEXT: s_lshr_b32 s0, s0, 1 3672; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 3673; GFX6-NEXT: v_lshlrev_b32_e32 v0, s5, v0 3674; GFX6-NEXT: s_lshr_b32 s0, s0, s2 3675; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 3676; GFX6-NEXT: s_and_b32 s0, s3, 15 3677; GFX6-NEXT: s_lshl_b32 s1, s1, 1 3678; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 3679; GFX6-NEXT: s_andn2_b32 s2, 15, s3 3680; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1 3681; GFX6-NEXT: s_and_b32 s0, s1, s4 3682; GFX6-NEXT: s_lshr_b32 s0, s0, 1 3683; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000 3684; GFX6-NEXT: s_lshr_b32 s0, s0, s1 3685; GFX6-NEXT: v_or_b32_e32 v1, s0, v1 3686; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 3687; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 3688; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3689; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3690; GFX6-NEXT: ; return to shader part epilog 3691; 3692; GFX8-LABEL: v_fshr_v2i16_vss: 3693; GFX8: ; %bb.0: 3694; GFX8-NEXT: s_bfe_u32 s3, s0, 0x100000 3695; GFX8-NEXT: s_bfe_u32 s4, 15, 0x100000 3696; GFX8-NEXT: s_lshr_b32 s2, s0, 16 3697; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v0 3698; GFX8-NEXT: s_lshr_b32 s3, s3, s4 3699; GFX8-NEXT: v_mov_b32_e32 v2, 1 3700; GFX8-NEXT: v_or_b32_e32 v1, s3, v1 3701; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3702; GFX8-NEXT: s_lshr_b32 s3, s2, s4 3703; GFX8-NEXT: v_or_b32_e32 v0, s3, v0 3704; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000 3705; GFX8-NEXT: s_lshl_b32 s0, s0, s3 3706; GFX8-NEXT: s_xor_b32 s1, s1, -1 3707; GFX8-NEXT: s_lshr_b32 s4, s1, 16 3708; GFX8-NEXT: s_and_b32 s5, s1, 15 3709; GFX8-NEXT: s_andn2_b32 s1, 15, s1 3710; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 3711; GFX8-NEXT: s_lshr_b32 s0, s0, s3 3712; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3713; GFX8-NEXT: v_lshlrev_b16_e32 v1, s5, v1 3714; GFX8-NEXT: s_lshr_b32 s0, s0, s1 3715; GFX8-NEXT: s_lshl_b32 s2, s2, s3 3716; GFX8-NEXT: v_or_b32_e32 v1, s0, v1 3717; GFX8-NEXT: s_and_b32 s0, s4, 15 3718; GFX8-NEXT: s_andn2_b32 s1, 15, s4 3719; GFX8-NEXT: v_lshlrev_b16_e32 v0, s0, v0 3720; GFX8-NEXT: s_bfe_u32 s0, s2, 0x100000 3721; GFX8-NEXT: s_lshr_b32 s0, s0, s3 3722; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3723; GFX8-NEXT: s_lshr_b32 s0, s0, s1 3724; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 3725; GFX8-NEXT: v_mov_b32_e32 v2, 16 3726; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 3727; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3728; GFX8-NEXT: ; return to shader part epilog 3729; 3730; GFX9-LABEL: v_fshr_v2i16_vss: 3731; GFX9: ; %bb.0: 3732; GFX9-NEXT: s_mov_b32 s2, 0xf000f 3733; GFX9-NEXT: s_and_b32 s3, s1, s2 3734; GFX9-NEXT: s_andn2_b32 s1, s2, s1 3735; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 3736; GFX9-NEXT: s_mov_b32 s2, 0xffff 3737; GFX9-NEXT: v_pk_lshlrev_b16 v0, s1, v0 3738; GFX9-NEXT: s_lshr_b32 s1, s0, 16 3739; GFX9-NEXT: s_and_b32 s0, s0, s2 3740; GFX9-NEXT: s_lshr_b32 s4, s3, 16 3741; GFX9-NEXT: s_and_b32 s2, s3, s2 3742; GFX9-NEXT: s_lshr_b32 s0, s0, s2 3743; GFX9-NEXT: s_lshr_b32 s1, s1, s4 3744; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 3745; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 3746; GFX9-NEXT: ; return to shader part epilog 3747; 3748; GFX10-LABEL: v_fshr_v2i16_vss: 3749; GFX10: ; %bb.0: 3750; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 3751; GFX10-NEXT: s_mov_b32 s2, 0xf000f 3752; GFX10-NEXT: s_mov_b32 s3, 0xffff 3753; GFX10-NEXT: s_and_b32 s4, s1, s2 3754; GFX10-NEXT: s_andn2_b32 s1, s2, s1 3755; GFX10-NEXT: s_lshr_b32 s2, s0, 16 3756; GFX10-NEXT: s_and_b32 s0, s0, s3 3757; GFX10-NEXT: v_pk_lshlrev_b16 v0, s1, v0 3758; GFX10-NEXT: s_and_b32 s1, s4, s3 3759; GFX10-NEXT: s_lshr_b32 s3, s4, 16 3760; GFX10-NEXT: s_lshr_b32 s0, s0, s1 3761; GFX10-NEXT: s_lshr_b32 s1, s2, s3 3762; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 3763; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 3764; GFX10-NEXT: ; return to shader part epilog 3765 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3766 %cast = bitcast <2 x i16> %result to float 3767 ret float %cast 3768} 3769 3770; ; FIXME 3771; define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) { 3772; %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) 3773; %cast = bitcast <3 x i16> %result to i48 3774; ret i48 %cast 3775; } 3776 3777; ; FIXME 3778; define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) { 3779; %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) 3780; %cast.result = bitcast <3 x i16> %result to <3 x half> 3781; ret <3 x half> %cast.result 3782; } 3783 3784define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) { 3785; GFX6-LABEL: s_fshr_v4i16: 3786; GFX6: ; %bb.0: 3787; GFX6-NEXT: s_mov_b32 s12, 0xffff 3788; GFX6-NEXT: s_lshl_b32 s9, s9, 16 3789; GFX6-NEXT: s_and_b32 s8, s8, s12 3790; GFX6-NEXT: s_or_b32 s8, s9, s8 3791; GFX6-NEXT: s_lshl_b32 s9, s11, 16 3792; GFX6-NEXT: s_and_b32 s10, s10, s12 3793; GFX6-NEXT: s_or_b32 s9, s9, s10 3794; GFX6-NEXT: s_bfe_u32 s10, 1, 0x100000 3795; GFX6-NEXT: s_and_b32 s11, s4, s12 3796; GFX6-NEXT: s_lshl_b32 s0, s0, s10 3797; GFX6-NEXT: s_lshr_b32 s11, s11, 15 3798; GFX6-NEXT: s_or_b32 s0, s0, s11 3799; GFX6-NEXT: s_and_b32 s11, s5, s12 3800; GFX6-NEXT: s_lshl_b32 s1, s1, s10 3801; GFX6-NEXT: s_lshr_b32 s11, s11, 15 3802; GFX6-NEXT: s_lshl_b32 s4, s4, 1 3803; GFX6-NEXT: s_xor_b32 s8, s8, -1 3804; GFX6-NEXT: s_or_b32 s1, s1, s11 3805; GFX6-NEXT: s_lshr_b32 s11, s8, 16 3806; GFX6-NEXT: s_and_b32 s13, s8, 15 3807; GFX6-NEXT: s_andn2_b32 s8, 15, s8 3808; GFX6-NEXT: s_and_b32 s4, s4, s12 3809; GFX6-NEXT: s_bfe_u32 s13, s13, 0x100000 3810; GFX6-NEXT: s_lshr_b32 s4, s4, 1 3811; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 3812; GFX6-NEXT: s_lshl_b32 s0, s0, s13 3813; GFX6-NEXT: s_lshr_b32 s4, s4, s8 3814; GFX6-NEXT: s_or_b32 s0, s0, s4 3815; GFX6-NEXT: s_and_b32 s4, s11, 15 3816; GFX6-NEXT: s_lshl_b32 s5, s5, 1 3817; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000 3818; GFX6-NEXT: s_andn2_b32 s8, 15, s11 3819; GFX6-NEXT: s_lshl_b32 s1, s1, s4 3820; GFX6-NEXT: s_and_b32 s4, s5, s12 3821; GFX6-NEXT: s_lshr_b32 s4, s4, 1 3822; GFX6-NEXT: s_bfe_u32 s5, s8, 0x100000 3823; GFX6-NEXT: s_lshr_b32 s4, s4, s5 3824; GFX6-NEXT: s_or_b32 s1, s1, s4 3825; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 3826; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 3827; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3828; GFX6-NEXT: s_or_b32 s0, s0, s1 3829; GFX6-NEXT: s_lshl_b32 s1, s2, s10 3830; GFX6-NEXT: s_and_b32 s2, s6, s12 3831; GFX6-NEXT: s_lshr_b32 s2, s2, 15 3832; GFX6-NEXT: s_or_b32 s1, s1, s2 3833; GFX6-NEXT: s_lshl_b32 s2, s3, s10 3834; GFX6-NEXT: s_and_b32 s3, s7, s12 3835; GFX6-NEXT: s_lshr_b32 s3, s3, 15 3836; GFX6-NEXT: s_or_b32 s2, s2, s3 3837; GFX6-NEXT: s_lshl_b32 s3, s6, 1 3838; GFX6-NEXT: s_xor_b32 s5, s9, -1 3839; GFX6-NEXT: s_lshl_b32 s4, s7, 1 3840; GFX6-NEXT: s_lshr_b32 s6, s5, 16 3841; GFX6-NEXT: s_and_b32 s7, s5, 15 3842; GFX6-NEXT: s_andn2_b32 s5, 15, s5 3843; GFX6-NEXT: s_and_b32 s3, s3, s12 3844; GFX6-NEXT: s_bfe_u32 s7, s7, 0x100000 3845; GFX6-NEXT: s_lshr_b32 s3, s3, 1 3846; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000 3847; GFX6-NEXT: s_lshl_b32 s1, s1, s7 3848; GFX6-NEXT: s_lshr_b32 s3, s3, s5 3849; GFX6-NEXT: s_or_b32 s1, s1, s3 3850; GFX6-NEXT: s_and_b32 s3, s6, 15 3851; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000 3852; GFX6-NEXT: s_andn2_b32 s5, 15, s6 3853; GFX6-NEXT: s_lshl_b32 s2, s2, s3 3854; GFX6-NEXT: s_and_b32 s3, s4, s12 3855; GFX6-NEXT: s_lshr_b32 s3, s3, 1 3856; GFX6-NEXT: s_bfe_u32 s4, s5, 0x100000 3857; GFX6-NEXT: s_lshr_b32 s3, s3, s4 3858; GFX6-NEXT: s_or_b32 s2, s2, s3 3859; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 3860; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 3861; GFX6-NEXT: s_lshl_b32 s2, s2, 16 3862; GFX6-NEXT: s_or_b32 s1, s1, s2 3863; GFX6-NEXT: ; return to shader part epilog 3864; 3865; GFX8-LABEL: s_fshr_v4i16: 3866; GFX8: ; %bb.0: 3867; GFX8-NEXT: s_bfe_u32 s8, 1, 0x100000 3868; GFX8-NEXT: s_bfe_u32 s9, s2, 0x100000 3869; GFX8-NEXT: s_bfe_u32 s10, 15, 0x100000 3870; GFX8-NEXT: s_lshr_b32 s6, s0, 16 3871; GFX8-NEXT: s_lshr_b32 s7, s2, 16 3872; GFX8-NEXT: s_lshl_b32 s0, s0, s8 3873; GFX8-NEXT: s_lshr_b32 s9, s9, s10 3874; GFX8-NEXT: s_or_b32 s0, s0, s9 3875; GFX8-NEXT: s_lshl_b32 s6, s6, s8 3876; GFX8-NEXT: s_lshr_b32 s9, s7, s10 3877; GFX8-NEXT: s_lshl_b32 s2, s2, s8 3878; GFX8-NEXT: s_xor_b32 s4, s4, -1 3879; GFX8-NEXT: s_or_b32 s6, s6, s9 3880; GFX8-NEXT: s_lshr_b32 s9, s4, 16 3881; GFX8-NEXT: s_and_b32 s11, s4, 15 3882; GFX8-NEXT: s_andn2_b32 s4, 15, s4 3883; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3884; GFX8-NEXT: s_bfe_u32 s11, s11, 0x100000 3885; GFX8-NEXT: s_lshr_b32 s2, s2, s8 3886; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 3887; GFX8-NEXT: s_lshl_b32 s0, s0, s11 3888; GFX8-NEXT: s_lshr_b32 s2, s2, s4 3889; GFX8-NEXT: s_or_b32 s0, s0, s2 3890; GFX8-NEXT: s_and_b32 s2, s9, 15 3891; GFX8-NEXT: s_lshl_b32 s7, s7, s8 3892; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3893; GFX8-NEXT: s_andn2_b32 s4, 15, s9 3894; GFX8-NEXT: s_lshl_b32 s2, s6, s2 3895; GFX8-NEXT: s_bfe_u32 s6, s7, 0x100000 3896; GFX8-NEXT: s_lshr_b32 s6, s6, s8 3897; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 3898; GFX8-NEXT: s_lshr_b32 s4, s6, s4 3899; GFX8-NEXT: s_or_b32 s2, s2, s4 3900; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3901; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 3902; GFX8-NEXT: s_lshl_b32 s2, s2, 16 3903; GFX8-NEXT: s_bfe_u32 s6, s3, 0x100000 3904; GFX8-NEXT: s_or_b32 s0, s0, s2 3905; GFX8-NEXT: s_lshr_b32 s2, s1, 16 3906; GFX8-NEXT: s_lshr_b32 s4, s3, 16 3907; GFX8-NEXT: s_lshl_b32 s1, s1, s8 3908; GFX8-NEXT: s_lshr_b32 s6, s6, s10 3909; GFX8-NEXT: s_or_b32 s1, s1, s6 3910; GFX8-NEXT: s_lshl_b32 s2, s2, s8 3911; GFX8-NEXT: s_lshr_b32 s6, s4, s10 3912; GFX8-NEXT: s_lshl_b32 s3, s3, s8 3913; GFX8-NEXT: s_xor_b32 s5, s5, -1 3914; GFX8-NEXT: s_or_b32 s2, s2, s6 3915; GFX8-NEXT: s_lshr_b32 s6, s5, 16 3916; GFX8-NEXT: s_and_b32 s7, s5, 15 3917; GFX8-NEXT: s_andn2_b32 s5, 15, s5 3918; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 3919; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000 3920; GFX8-NEXT: s_lshr_b32 s3, s3, s8 3921; GFX8-NEXT: s_bfe_u32 s5, s5, 0x100000 3922; GFX8-NEXT: s_lshl_b32 s1, s1, s7 3923; GFX8-NEXT: s_lshr_b32 s3, s3, s5 3924; GFX8-NEXT: s_or_b32 s1, s1, s3 3925; GFX8-NEXT: s_and_b32 s3, s6, 15 3926; GFX8-NEXT: s_lshl_b32 s4, s4, s8 3927; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 3928; GFX8-NEXT: s_andn2_b32 s5, 15, s6 3929; GFX8-NEXT: s_lshl_b32 s2, s2, s3 3930; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000 3931; GFX8-NEXT: s_lshr_b32 s3, s3, s8 3932; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000 3933; GFX8-NEXT: s_lshr_b32 s3, s3, s4 3934; GFX8-NEXT: s_or_b32 s2, s2, s3 3935; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3936; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3937; GFX8-NEXT: s_lshl_b32 s2, s2, 16 3938; GFX8-NEXT: s_or_b32 s1, s1, s2 3939; GFX8-NEXT: ; return to shader part epilog 3940; 3941; GFX9-LABEL: s_fshr_v4i16: 3942; GFX9: ; %bb.0: 3943; GFX9-NEXT: s_mov_b32 s8, 0x10001 3944; GFX9-NEXT: s_lshr_b32 s9, s0, 16 3945; GFX9-NEXT: s_mov_b32 s6, 0xf000f 3946; GFX9-NEXT: s_lshl_b32 s0, s0, s8 3947; GFX9-NEXT: s_lshl_b32 s9, s9, 1 3948; GFX9-NEXT: s_and_b32 s7, s4, s6 3949; GFX9-NEXT: s_andn2_b32 s4, s6, s4 3950; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s9 3951; GFX9-NEXT: s_lshr_b32 s9, s0, 16 3952; GFX9-NEXT: s_lshr_b32 s10, s4, 16 3953; GFX9-NEXT: s_lshl_b32 s0, s0, s4 3954; GFX9-NEXT: s_lshl_b32 s4, s9, s10 3955; GFX9-NEXT: s_mov_b32 s9, 0xffff 3956; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 3957; GFX9-NEXT: s_lshr_b32 s4, s2, 16 3958; GFX9-NEXT: s_and_b32 s2, s2, s9 3959; GFX9-NEXT: s_lshr_b32 s10, s7, 16 3960; GFX9-NEXT: s_and_b32 s7, s7, s9 3961; GFX9-NEXT: s_lshr_b32 s2, s2, s7 3962; GFX9-NEXT: s_lshr_b32 s4, s4, s10 3963; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 3964; GFX9-NEXT: s_or_b32 s0, s0, s2 3965; GFX9-NEXT: s_and_b32 s2, s5, s6 3966; GFX9-NEXT: s_andn2_b32 s4, s6, s5 3967; GFX9-NEXT: s_lshr_b32 s5, s1, 16 3968; GFX9-NEXT: s_lshl_b32 s1, s1, s8 3969; GFX9-NEXT: s_lshl_b32 s5, s5, 1 3970; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 3971; GFX9-NEXT: s_lshr_b32 s5, s1, 16 3972; GFX9-NEXT: s_lshr_b32 s6, s4, 16 3973; GFX9-NEXT: s_lshl_b32 s1, s1, s4 3974; GFX9-NEXT: s_lshl_b32 s4, s5, s6 3975; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4 3976; GFX9-NEXT: s_lshr_b32 s4, s3, 16 3977; GFX9-NEXT: s_and_b32 s3, s3, s9 3978; GFX9-NEXT: s_lshr_b32 s5, s2, 16 3979; GFX9-NEXT: s_and_b32 s2, s2, s9 3980; GFX9-NEXT: s_lshr_b32 s2, s3, s2 3981; GFX9-NEXT: s_lshr_b32 s3, s4, s5 3982; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3 3983; GFX9-NEXT: s_or_b32 s1, s1, s2 3984; GFX9-NEXT: ; return to shader part epilog 3985; 3986; GFX10-LABEL: s_fshr_v4i16: 3987; GFX10: ; %bb.0: 3988; GFX10-NEXT: s_mov_b32 s7, 0x10001 3989; GFX10-NEXT: s_lshr_b32 s8, s0, 16 3990; GFX10-NEXT: s_mov_b32 s6, 0xf000f 3991; GFX10-NEXT: s_lshl_b32 s0, s0, s7 3992; GFX10-NEXT: s_lshl_b32 s8, s8, 1 3993; GFX10-NEXT: s_and_b32 s9, s4, s6 3994; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s8 3995; GFX10-NEXT: s_andn2_b32 s4, s6, s4 3996; GFX10-NEXT: s_lshr_b32 s8, s0, 16 3997; GFX10-NEXT: s_lshr_b32 s10, s4, 16 3998; GFX10-NEXT: s_lshl_b32 s0, s0, s4 3999; GFX10-NEXT: s_lshl_b32 s4, s8, s10 4000; GFX10-NEXT: s_mov_b32 s8, 0xffff 4001; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 4002; GFX10-NEXT: s_lshr_b32 s4, s1, 16 4003; GFX10-NEXT: s_lshl_b32 s1, s1, s7 4004; GFX10-NEXT: s_lshl_b32 s4, s4, 1 4005; GFX10-NEXT: s_and_b32 s7, s5, s6 4006; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4007; GFX10-NEXT: s_andn2_b32 s4, s6, s5 4008; GFX10-NEXT: s_lshr_b32 s5, s1, 16 4009; GFX10-NEXT: s_lshr_b32 s6, s4, 16 4010; GFX10-NEXT: s_lshr_b32 s10, s2, 16 4011; GFX10-NEXT: s_and_b32 s2, s2, s8 4012; GFX10-NEXT: s_and_b32 s11, s9, s8 4013; GFX10-NEXT: s_lshr_b32 s9, s9, 16 4014; GFX10-NEXT: s_lshl_b32 s1, s1, s4 4015; GFX10-NEXT: s_lshl_b32 s4, s5, s6 4016; GFX10-NEXT: s_lshr_b32 s5, s3, 16 4017; GFX10-NEXT: s_and_b32 s3, s3, s8 4018; GFX10-NEXT: s_and_b32 s6, s7, s8 4019; GFX10-NEXT: s_lshr_b32 s7, s7, 16 4020; GFX10-NEXT: s_lshr_b32 s2, s2, s11 4021; GFX10-NEXT: s_lshr_b32 s9, s10, s9 4022; GFX10-NEXT: s_lshr_b32 s3, s3, s6 4023; GFX10-NEXT: s_lshr_b32 s5, s5, s7 4024; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s9 4025; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4026; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5 4027; GFX10-NEXT: s_or_b32 s0, s0, s2 4028; GFX10-NEXT: s_or_b32 s1, s1, s3 4029; GFX10-NEXT: ; return to shader part epilog 4030 %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) 4031 %cast.result = bitcast <4 x i16> %result to <2 x i32> 4032 ret <2 x i32> %cast.result 4033} 4034 4035define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) { 4036; GFX6-LABEL: v_fshr_v4i16: 4037; GFX6: ; %bb.0: 4038; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4039; GFX6-NEXT: v_mov_b32_e32 v12, 0xffff 4040; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9 4041; GFX6-NEXT: v_and_b32_e32 v8, v8, v12 4042; GFX6-NEXT: v_or_b32_e32 v8, v9, v8 4043; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v11 4044; GFX6-NEXT: v_and_b32_e32 v10, v10, v12 4045; GFX6-NEXT: s_mov_b32 s5, 0xffff 4046; GFX6-NEXT: v_or_b32_e32 v9, v9, v10 4047; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000 4048; GFX6-NEXT: v_and_b32_e32 v10, s5, v4 4049; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 4050; GFX6-NEXT: v_lshrrev_b32_e32 v10, 15, v10 4051; GFX6-NEXT: v_or_b32_e32 v0, v0, v10 4052; GFX6-NEXT: v_and_b32_e32 v10, s5, v5 4053; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1 4054; GFX6-NEXT: v_lshrrev_b32_e32 v10, 15, v10 4055; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 4056; GFX6-NEXT: v_or_b32_e32 v1, v1, v10 4057; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 4058; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v8 4059; GFX6-NEXT: v_and_b32_e32 v11, 15, v8 4060; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 4061; GFX6-NEXT: v_and_b32_e32 v8, 15, v8 4062; GFX6-NEXT: v_and_b32_e32 v4, v4, v12 4063; GFX6-NEXT: v_bfe_u32 v11, v11, 0, 16 4064; GFX6-NEXT: v_lshrrev_b32_e32 v4, 1, v4 4065; GFX6-NEXT: v_bfe_u32 v8, v8, 0, 16 4066; GFX6-NEXT: v_lshlrev_b32_e32 v0, v11, v0 4067; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4 4068; GFX6-NEXT: v_or_b32_e32 v0, v0, v4 4069; GFX6-NEXT: v_and_b32_e32 v4, 15, v10 4070; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5 4071; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10 4072; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16 4073; GFX6-NEXT: v_and_b32_e32 v8, 15, v8 4074; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 4075; GFX6-NEXT: v_and_b32_e32 v4, v5, v12 4076; GFX6-NEXT: v_lshrrev_b32_e32 v4, 1, v4 4077; GFX6-NEXT: v_bfe_u32 v5, v8, 0, 16 4078; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 4079; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 4080; GFX6-NEXT: v_and_b32_e32 v4, v6, v12 4081; GFX6-NEXT: v_lshlrev_b32_e32 v2, s4, v2 4082; GFX6-NEXT: v_lshrrev_b32_e32 v4, 15, v4 4083; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 4084; GFX6-NEXT: v_and_b32_e32 v4, v7, v12 4085; GFX6-NEXT: v_lshlrev_b32_e32 v3, s4, v3 4086; GFX6-NEXT: v_lshrrev_b32_e32 v4, 15, v4 4087; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 4088; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v6 4089; GFX6-NEXT: v_xor_b32_e32 v6, -1, v9 4090; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v7 4091; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6 4092; GFX6-NEXT: v_and_b32_e32 v8, 15, v6 4093; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6 4094; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 4095; GFX6-NEXT: v_and_b32_e32 v4, v4, v12 4096; GFX6-NEXT: v_bfe_u32 v8, v8, 0, 16 4097; GFX6-NEXT: v_lshrrev_b32_e32 v4, 1, v4 4098; GFX6-NEXT: v_bfe_u32 v6, v6, 0, 16 4099; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2 4100; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4 4101; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 4102; GFX6-NEXT: v_and_b32_e32 v4, 15, v7 4103; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7 4104; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16 4105; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 4106; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 4107; GFX6-NEXT: v_and_b32_e32 v4, v5, v12 4108; GFX6-NEXT: v_lshrrev_b32_e32 v4, 1, v4 4109; GFX6-NEXT: v_bfe_u32 v5, v6, 0, 16 4110; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 4111; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 4112; GFX6-NEXT: s_setpc_b64 s[30:31] 4113; 4114; GFX8-LABEL: v_fshr_v4i16: 4115; GFX8: ; %bb.0: 4116; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4117; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v0 4118; GFX8-NEXT: v_lshrrev_b16_e32 v7, 15, v2 4119; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 4120; GFX8-NEXT: v_mov_b32_e32 v7, 1 4121; GFX8-NEXT: v_mov_b32_e32 v8, 15 4122; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4123; GFX8-NEXT: v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4124; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 4125; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 4126; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v2 4127; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4128; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v4 4129; GFX8-NEXT: v_and_b32_e32 v10, 15, v4 4130; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 4131; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 4132; GFX8-NEXT: v_lshrrev_b16_e32 v9, 1, v9 4133; GFX8-NEXT: v_lshlrev_b16_e32 v6, v10, v6 4134; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v9 4135; GFX8-NEXT: v_or_b32_e32 v4, v6, v4 4136; GFX8-NEXT: v_and_b32_e32 v6, 15, v7 4137; GFX8-NEXT: v_xor_b32_e32 v7, -1, v7 4138; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 4139; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 4140; GFX8-NEXT: v_lshlrev_b16_e32 v0, v6, v0 4141; GFX8-NEXT: v_lshrrev_b16_e32 v2, v7, v2 4142; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4143; GFX8-NEXT: v_mov_b32_e32 v2, 16 4144; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4145; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4146; GFX8-NEXT: v_lshlrev_b16_e32 v4, 1, v1 4147; GFX8-NEXT: v_lshrrev_b16_e32 v6, 15, v3 4148; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 4149; GFX8-NEXT: v_mov_b32_e32 v6, 1 4150; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4151; GFX8-NEXT: v_lshrrev_b16_sdwa v7, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4152; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 4153; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 4154; GFX8-NEXT: v_lshlrev_b16_e32 v7, 1, v3 4155; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4156; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v5 4157; GFX8-NEXT: v_and_b32_e32 v8, 15, v5 4158; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 4159; GFX8-NEXT: v_and_b32_e32 v5, 15, v5 4160; GFX8-NEXT: v_lshrrev_b16_e32 v7, 1, v7 4161; GFX8-NEXT: v_lshlrev_b16_e32 v4, v8, v4 4162; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v7 4163; GFX8-NEXT: v_or_b32_e32 v4, v4, v5 4164; GFX8-NEXT: v_and_b32_e32 v5, 15, v6 4165; GFX8-NEXT: v_xor_b32_e32 v6, -1, v6 4166; GFX8-NEXT: v_and_b32_e32 v6, 15, v6 4167; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3 4168; GFX8-NEXT: v_lshlrev_b16_e32 v1, v5, v1 4169; GFX8-NEXT: v_lshrrev_b16_e32 v3, v6, v3 4170; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 4171; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4172; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4173; GFX8-NEXT: s_setpc_b64 s[30:31] 4174; 4175; GFX9-LABEL: v_fshr_v4i16: 4176; GFX9: ; %bb.0: 4177; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4178; GFX9-NEXT: s_mov_b32 s4, 0xf000f 4179; GFX9-NEXT: v_and_b32_e32 v6, s4, v4 4180; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4 4181; GFX9-NEXT: v_and_b32_e32 v4, s4, v4 4182; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4183; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0 4184; GFX9-NEXT: v_pk_lshrrev_b16 v2, v6, v2 4185; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5 4186; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 4187; GFX9-NEXT: v_and_b32_e32 v2, s4, v5 4188; GFX9-NEXT: v_and_b32_e32 v4, s4, v4 4189; GFX9-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 4190; GFX9-NEXT: v_pk_lshlrev_b16 v1, v4, v1 4191; GFX9-NEXT: v_pk_lshrrev_b16 v2, v2, v3 4192; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 4193; GFX9-NEXT: s_setpc_b64 s[30:31] 4194; 4195; GFX10-LABEL: v_fshr_v4i16: 4196; GFX10: ; %bb.0: 4197; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4198; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4199; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 4200; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 4201; GFX10-NEXT: s_mov_b32 s4, 0xf000f 4202; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4203; GFX10-NEXT: v_and_b32_e32 v4, s4, v4 4204; GFX10-NEXT: v_and_b32_e32 v6, s4, v6 4205; GFX10-NEXT: v_and_b32_e32 v5, s4, v5 4206; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 4207; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 4208; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2 4209; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0 4210; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3 4211; GFX10-NEXT: v_pk_lshlrev_b16 v1, v7, v1 4212; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 4213; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 4214; GFX10-NEXT: s_setpc_b64 s[30:31] 4215 %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) 4216 %cast.result = bitcast <4 x i16> %result to <4 x half> 4217 ret <4 x half> %cast.result 4218} 4219 4220define amdgpu_ps i64 @s_fshr_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) { 4221; GFX6-LABEL: s_fshr_i64: 4222; GFX6: ; %bb.0: 4223; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 63 4224; GFX6-NEXT: s_andn2_b64 s[4:5], 63, s[4:5] 4225; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4226; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 4227; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 4228; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4229; GFX6-NEXT: ; return to shader part epilog 4230; 4231; GFX8-LABEL: s_fshr_i64: 4232; GFX8: ; %bb.0: 4233; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 63 4234; GFX8-NEXT: s_andn2_b64 s[4:5], 63, s[4:5] 4235; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4236; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 4237; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 4238; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4239; GFX8-NEXT: ; return to shader part epilog 4240; 4241; GFX9-LABEL: s_fshr_i64: 4242; GFX9: ; %bb.0: 4243; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 63 4244; GFX9-NEXT: s_andn2_b64 s[4:5], 63, s[4:5] 4245; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4246; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 4247; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 4248; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4249; GFX9-NEXT: ; return to shader part epilog 4250; 4251; GFX10-LABEL: s_fshr_i64: 4252; GFX10: ; %bb.0: 4253; GFX10-NEXT: s_andn2_b64 s[6:7], 63, s[4:5] 4254; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4255; GFX10-NEXT: s_and_b64 s[4:5], s[4:5], 63 4256; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s6 4257; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 4258; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4259; GFX10-NEXT: ; return to shader part epilog 4260 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 4261 ret i64 %result 4262} 4263 4264define amdgpu_ps i64 @s_fshr_i64_5(i64 inreg %lhs, i64 inreg %rhs) { 4265; GCN-LABEL: s_fshr_i64_5: 4266; GCN: ; %bb.0: 4267; GCN-NEXT: s_lshl_b32 s1, s0, 27 4268; GCN-NEXT: s_mov_b32 s0, 0 4269; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], 5 4270; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4271; GCN-NEXT: ; return to shader part epilog 4272 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5) 4273 ret i64 %result 4274} 4275 4276define amdgpu_ps i64 @s_fshr_i64_32(i64 inreg %lhs, i64 inreg %rhs) { 4277; GCN-LABEL: s_fshr_i64_32: 4278; GCN: ; %bb.0: 4279; GCN-NEXT: s_mov_b32 s1, s0 4280; GCN-NEXT: s_mov_b32 s0, 0 4281; GCN-NEXT: s_mov_b32 s2, s3 4282; GCN-NEXT: s_mov_b32 s3, s0 4283; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4284; GCN-NEXT: ; return to shader part epilog 4285 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32) 4286 ret i64 %result 4287} 4288 4289define amdgpu_ps i64 @s_fshr_i64_48(i64 inreg %lhs, i64 inreg %rhs) { 4290; GCN-LABEL: s_fshr_i64_48: 4291; GCN: ; %bb.0: 4292; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 16 4293; GCN-NEXT: s_lshr_b32 s2, s3, 16 4294; GCN-NEXT: s_mov_b32 s3, 0 4295; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4296; GCN-NEXT: ; return to shader part epilog 4297 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48) 4298 ret i64 %result 4299} 4300 4301define i64 @v_fshr_i64(i64 %lhs, i64 %rhs, i64 %amt) { 4302; GFX6-LABEL: v_fshr_i64: 4303; GFX6: ; %bb.0: 4304; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4305; GFX6-NEXT: v_and_b32_e32 v5, 63, v4 4306; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 4307; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 4308; GFX6-NEXT: v_and_b32_e32 v4, 63, v4 4309; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v4 4310; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v5 4311; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 4312; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 4313; GFX6-NEXT: s_setpc_b64 s[30:31] 4314; 4315; GFX8-LABEL: v_fshr_i64: 4316; GFX8: ; %bb.0: 4317; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4318; GFX8-NEXT: v_and_b32_e32 v5, 63, v4 4319; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 4320; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4321; GFX8-NEXT: v_and_b32_e32 v4, 63, v4 4322; GFX8-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 4323; GFX8-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3] 4324; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4325; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 4326; GFX8-NEXT: s_setpc_b64 s[30:31] 4327; 4328; GFX9-LABEL: v_fshr_i64: 4329; GFX9: ; %bb.0: 4330; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4331; GFX9-NEXT: v_and_b32_e32 v5, 63, v4 4332; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4 4333; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4334; GFX9-NEXT: v_and_b32_e32 v4, 63, v4 4335; GFX9-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 4336; GFX9-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3] 4337; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 4338; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 4339; GFX9-NEXT: s_setpc_b64 s[30:31] 4340; 4341; GFX10-LABEL: v_fshr_i64: 4342; GFX10: ; %bb.0: 4343; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4344; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4345; GFX10-NEXT: v_xor_b32_e32 v5, -1, v4 4346; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4347; GFX10-NEXT: v_and_b32_e32 v4, 63, v4 4348; GFX10-NEXT: v_and_b32_e32 v5, 63, v5 4349; GFX10-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] 4350; GFX10-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1] 4351; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 4352; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 4353; GFX10-NEXT: s_setpc_b64 s[30:31] 4354 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 4355 ret i64 %result 4356} 4357 4358define i64 @v_fshr_i64_5(i64 %lhs, i64 %rhs) { 4359; GFX6-LABEL: v_fshr_i64_5: 4360; GFX6: ; %bb.0: 4361; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4362; GFX6-NEXT: v_mov_b32_e32 v4, v0 4363; GFX6-NEXT: v_lshr_b64 v[0:1], v[2:3], 5 4364; GFX6-NEXT: v_lshlrev_b32_e32 v2, 27, v4 4365; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 4366; GFX6-NEXT: s_setpc_b64 s[30:31] 4367; 4368; GFX8-LABEL: v_fshr_i64_5: 4369; GFX8: ; %bb.0: 4370; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4371; GFX8-NEXT: v_mov_b32_e32 v4, v0 4372; GFX8-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 4373; GFX8-NEXT: v_lshlrev_b32_e32 v2, 27, v4 4374; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 4375; GFX8-NEXT: s_setpc_b64 s[30:31] 4376; 4377; GFX9-LABEL: v_fshr_i64_5: 4378; GFX9: ; %bb.0: 4379; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4380; GFX9-NEXT: v_mov_b32_e32 v4, v0 4381; GFX9-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 4382; GFX9-NEXT: v_lshlrev_b32_e32 v2, 27, v4 4383; GFX9-NEXT: v_or_b32_e32 v1, v2, v1 4384; GFX9-NEXT: s_setpc_b64 s[30:31] 4385; 4386; GFX10-LABEL: v_fshr_i64_5: 4387; GFX10: ; %bb.0: 4388; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4389; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4390; GFX10-NEXT: v_mov_b32_e32 v4, v0 4391; GFX10-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 4392; GFX10-NEXT: v_lshlrev_b32_e32 v2, 27, v4 4393; GFX10-NEXT: v_or_b32_e32 v1, v2, v1 4394; GFX10-NEXT: s_setpc_b64 s[30:31] 4395 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5) 4396 ret i64 %result 4397} 4398 4399define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) { 4400; GFX6-LABEL: v_fshr_i64_32: 4401; GFX6: ; %bb.0: 4402; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4403; GFX6-NEXT: v_mov_b32_e32 v1, v0 4404; GFX6-NEXT: v_mov_b32_e32 v0, v3 4405; GFX6-NEXT: s_setpc_b64 s[30:31] 4406; 4407; GFX8-LABEL: v_fshr_i64_32: 4408; GFX8: ; %bb.0: 4409; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4410; GFX8-NEXT: v_mov_b32_e32 v1, v0 4411; GFX8-NEXT: v_mov_b32_e32 v0, v3 4412; GFX8-NEXT: s_setpc_b64 s[30:31] 4413; 4414; GFX9-LABEL: v_fshr_i64_32: 4415; GFX9: ; %bb.0: 4416; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4417; GFX9-NEXT: v_mov_b32_e32 v1, v0 4418; GFX9-NEXT: v_mov_b32_e32 v0, v3 4419; GFX9-NEXT: s_setpc_b64 s[30:31] 4420; 4421; GFX10-LABEL: v_fshr_i64_32: 4422; GFX10: ; %bb.0: 4423; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4424; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4425; GFX10-NEXT: v_mov_b32_e32 v1, v0 4426; GFX10-NEXT: v_mov_b32_e32 v0, v3 4427; GFX10-NEXT: s_setpc_b64 s[30:31] 4428 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32) 4429 ret i64 %result 4430} 4431 4432define i64 @v_fshr_i64_48(i64 %lhs, i64 %rhs) { 4433; GFX6-LABEL: v_fshr_i64_48: 4434; GFX6: ; %bb.0: 4435; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4436; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 16 4437; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v3 4438; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 4439; GFX6-NEXT: s_setpc_b64 s[30:31] 4440; 4441; GFX8-LABEL: v_fshr_i64_48: 4442; GFX8: ; %bb.0: 4443; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4444; GFX8-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 4445; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4446; GFX8-NEXT: s_setpc_b64 s[30:31] 4447; 4448; GFX9-LABEL: v_fshr_i64_48: 4449; GFX9: ; %bb.0: 4450; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4451; GFX9-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 4452; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4453; GFX9-NEXT: s_setpc_b64 s[30:31] 4454; 4455; GFX10-LABEL: v_fshr_i64_48: 4456; GFX10: ; %bb.0: 4457; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4458; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4459; GFX10-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 4460; GFX10-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4461; GFX10-NEXT: s_setpc_b64 s[30:31] 4462 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48) 4463 ret i64 %result 4464} 4465 4466define amdgpu_ps <2 x float> @v_fshr_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) { 4467; GFX6-LABEL: v_fshr_i64_ssv: 4468; GFX6: ; %bb.0: 4469; GFX6-NEXT: v_and_b32_e32 v2, 63, v0 4470; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 4471; GFX6-NEXT: v_and_b32_e32 v0, 63, v0 4472; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4473; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v0 4474; GFX6-NEXT: v_lshr_b64 v[2:3], s[2:3], v2 4475; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 4476; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 4477; GFX6-NEXT: ; return to shader part epilog 4478; 4479; GFX8-LABEL: v_fshr_i64_ssv: 4480; GFX8: ; %bb.0: 4481; GFX8-NEXT: v_and_b32_e32 v2, 63, v0 4482; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 4483; GFX8-NEXT: v_and_b32_e32 v0, 63, v0 4484; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4485; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] 4486; GFX8-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3] 4487; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4488; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 4489; GFX8-NEXT: ; return to shader part epilog 4490; 4491; GFX9-LABEL: v_fshr_i64_ssv: 4492; GFX9: ; %bb.0: 4493; GFX9-NEXT: v_and_b32_e32 v2, 63, v0 4494; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 4495; GFX9-NEXT: v_and_b32_e32 v0, 63, v0 4496; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4497; GFX9-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] 4498; GFX9-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3] 4499; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 4500; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 4501; GFX9-NEXT: ; return to shader part epilog 4502; 4503; GFX10-LABEL: v_fshr_i64_ssv: 4504; GFX10: ; %bb.0: 4505; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 4506; GFX10-NEXT: v_and_b32_e32 v0, 63, v0 4507; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4508; GFX10-NEXT: v_and_b32_e32 v2, 63, v1 4509; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[2:3] 4510; GFX10-NEXT: v_lshlrev_b64 v[2:3], v2, s[0:1] 4511; GFX10-NEXT: v_or_b32_e32 v0, v2, v0 4512; GFX10-NEXT: v_or_b32_e32 v1, v3, v1 4513; GFX10-NEXT: ; return to shader part epilog 4514 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 4515 %cast = bitcast i64 %result to <2 x float> 4516 ret <2 x float> %cast 4517} 4518 4519define amdgpu_ps <2 x float> @v_fshr_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) { 4520; GFX6-LABEL: v_fshr_i64_svs: 4521; GFX6: ; %bb.0: 4522; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63 4523; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4524; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4525; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], s4 4526; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 4527; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 4528; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 4529; GFX6-NEXT: ; return to shader part epilog 4530; 4531; GFX8-LABEL: v_fshr_i64_svs: 4532; GFX8: ; %bb.0: 4533; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63 4534; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4535; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4536; GFX8-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 4537; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 4538; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 4539; GFX8-NEXT: v_or_b32_e32 v1, s1, v1 4540; GFX8-NEXT: ; return to shader part epilog 4541; 4542; GFX9-LABEL: v_fshr_i64_svs: 4543; GFX9: ; %bb.0: 4544; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63 4545; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4546; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4547; GFX9-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 4548; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 4549; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 4550; GFX9-NEXT: v_or_b32_e32 v1, s1, v1 4551; GFX9-NEXT: ; return to shader part epilog 4552; 4553; GFX10-LABEL: v_fshr_i64_svs: 4554; GFX10: ; %bb.0: 4555; GFX10-NEXT: s_and_b64 s[4:5], s[2:3], 63 4556; GFX10-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4557; GFX10-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 4558; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4559; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 4560; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 4561; GFX10-NEXT: v_or_b32_e32 v1, s1, v1 4562; GFX10-NEXT: ; return to shader part epilog 4563 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 4564 %cast = bitcast i64 %result to <2 x float> 4565 ret <2 x float> %cast 4566} 4567 4568define amdgpu_ps <2 x float> @v_fshr_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) { 4569; GFX6-LABEL: v_fshr_i64_vss: 4570; GFX6: ; %bb.0: 4571; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 4572; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63 4573; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4574; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s2 4575; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 4576; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 4577; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 4578; GFX6-NEXT: ; return to shader part epilog 4579; 4580; GFX8-LABEL: v_fshr_i64_vss: 4581; GFX8: ; %bb.0: 4582; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4583; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63 4584; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4585; GFX8-NEXT: v_lshlrev_b64 v[0:1], s2, v[0:1] 4586; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 4587; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 4588; GFX8-NEXT: v_or_b32_e32 v1, s1, v1 4589; GFX8-NEXT: ; return to shader part epilog 4590; 4591; GFX9-LABEL: v_fshr_i64_vss: 4592; GFX9: ; %bb.0: 4593; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4594; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63 4595; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4596; GFX9-NEXT: v_lshlrev_b64 v[0:1], s2, v[0:1] 4597; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 4598; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 4599; GFX9-NEXT: v_or_b32_e32 v1, s1, v1 4600; GFX9-NEXT: ; return to shader part epilog 4601; 4602; GFX10-LABEL: v_fshr_i64_vss: 4603; GFX10: ; %bb.0: 4604; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4605; GFX10-NEXT: s_andn2_b64 s[4:5], 63, s[2:3] 4606; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], 63 4607; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 4608; GFX10-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1] 4609; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 4610; GFX10-NEXT: v_or_b32_e32 v1, s1, v1 4611; GFX10-NEXT: ; return to shader part epilog 4612 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 4613 %cast = bitcast i64 %result to <2 x float> 4614 ret <2 x float> %cast 4615} 4616 4617define amdgpu_ps <2 x i64> @s_fshr_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) { 4618; GFX6-LABEL: s_fshr_v2i64: 4619; GFX6: ; %bb.0: 4620; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], 63 4621; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[8:9] 4622; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4623; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 4624; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 4625; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 4626; GFX6-NEXT: s_and_b64 s[4:5], s[10:11], 63 4627; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] 4628; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4629; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 4630; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], s4 4631; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 4632; GFX6-NEXT: ; return to shader part epilog 4633; 4634; GFX8-LABEL: s_fshr_v2i64: 4635; GFX8: ; %bb.0: 4636; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], 63 4637; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[8:9] 4638; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4639; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 4640; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 4641; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 4642; GFX8-NEXT: s_and_b64 s[4:5], s[10:11], 63 4643; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] 4644; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4645; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 4646; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], s4 4647; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 4648; GFX8-NEXT: ; return to shader part epilog 4649; 4650; GFX9-LABEL: s_fshr_v2i64: 4651; GFX9: ; %bb.0: 4652; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], 63 4653; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[8:9] 4654; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4655; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 4656; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 4657; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 4658; GFX9-NEXT: s_and_b64 s[4:5], s[10:11], 63 4659; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] 4660; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4661; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 4662; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], s4 4663; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 4664; GFX9-NEXT: ; return to shader part epilog 4665; 4666; GFX10-LABEL: s_fshr_v2i64: 4667; GFX10: ; %bb.0: 4668; GFX10-NEXT: s_andn2_b64 s[12:13], 63, s[8:9] 4669; GFX10-NEXT: s_and_b64 s[8:9], s[8:9], 63 4670; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4671; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 4672; GFX10-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] 4673; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4674; GFX10-NEXT: s_and_b64 s[10:11], s[10:11], 63 4675; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 4676; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 4677; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s10 4678; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 4679; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 4680; GFX10-NEXT: ; return to shader part epilog 4681 %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) 4682 ret <2 x i64> %result 4683} 4684 4685define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) { 4686; GFX6-LABEL: v_fshr_v2i64: 4687; GFX6: ; %bb.0: 4688; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4689; GFX6-NEXT: v_and_b32_e32 v9, 63, v8 4690; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 4691; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 4692; GFX6-NEXT: v_and_b32_e32 v8, 63, v8 4693; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v8 4694; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], v9 4695; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10 4696; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 4697; GFX6-NEXT: v_or_b32_e32 v0, v0, v4 4698; GFX6-NEXT: v_and_b32_e32 v4, 63, v10 4699; GFX6-NEXT: v_and_b32_e32 v8, 63, v8 4700; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v8 4701; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v4 4702; GFX6-NEXT: v_or_b32_e32 v1, v1, v5 4703; GFX6-NEXT: v_or_b32_e32 v2, v2, v6 4704; GFX6-NEXT: v_or_b32_e32 v3, v3, v7 4705; GFX6-NEXT: s_setpc_b64 s[30:31] 4706; 4707; GFX8-LABEL: v_fshr_v2i64: 4708; GFX8: ; %bb.0: 4709; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4710; GFX8-NEXT: v_and_b32_e32 v9, 63, v8 4711; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8 4712; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4713; GFX8-NEXT: v_and_b32_e32 v8, 63, v8 4714; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1] 4715; GFX8-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5] 4716; GFX8-NEXT: v_xor_b32_e32 v8, -1, v10 4717; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 4718; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 4719; GFX8-NEXT: v_and_b32_e32 v4, 63, v10 4720; GFX8-NEXT: v_and_b32_e32 v8, 63, v8 4721; GFX8-NEXT: v_lshlrev_b64 v[2:3], v8, v[2:3] 4722; GFX8-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7] 4723; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 4724; GFX8-NEXT: v_or_b32_e32 v2, v2, v6 4725; GFX8-NEXT: v_or_b32_e32 v3, v3, v7 4726; GFX8-NEXT: s_setpc_b64 s[30:31] 4727; 4728; GFX9-LABEL: v_fshr_v2i64: 4729; GFX9: ; %bb.0: 4730; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4731; GFX9-NEXT: v_and_b32_e32 v9, 63, v8 4732; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8 4733; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4734; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 4735; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1] 4736; GFX9-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5] 4737; GFX9-NEXT: v_xor_b32_e32 v8, -1, v10 4738; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 4739; GFX9-NEXT: v_or_b32_e32 v0, v0, v4 4740; GFX9-NEXT: v_and_b32_e32 v4, 63, v10 4741; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 4742; GFX9-NEXT: v_lshlrev_b64 v[2:3], v8, v[2:3] 4743; GFX9-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7] 4744; GFX9-NEXT: v_or_b32_e32 v1, v1, v5 4745; GFX9-NEXT: v_or_b32_e32 v2, v2, v6 4746; GFX9-NEXT: v_or_b32_e32 v3, v3, v7 4747; GFX9-NEXT: s_setpc_b64 s[30:31] 4748; 4749; GFX10-LABEL: v_fshr_v2i64: 4750; GFX10: ; %bb.0: 4751; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4752; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4753; GFX10-NEXT: v_xor_b32_e32 v9, -1, v8 4754; GFX10-NEXT: v_xor_b32_e32 v11, -1, v10 4755; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4756; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 4757; GFX10-NEXT: v_and_b32_e32 v8, 63, v8 4758; GFX10-NEXT: v_and_b32_e32 v9, 63, v9 4759; GFX10-NEXT: v_and_b32_e32 v11, 63, v11 4760; GFX10-NEXT: v_and_b32_e32 v10, 63, v10 4761; GFX10-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5] 4762; GFX10-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 4763; GFX10-NEXT: v_lshlrev_b64 v[2:3], v11, v[2:3] 4764; GFX10-NEXT: v_lshrrev_b64 v[6:7], v10, v[6:7] 4765; GFX10-NEXT: v_or_b32_e32 v0, v0, v4 4766; GFX10-NEXT: v_or_b32_e32 v1, v1, v5 4767; GFX10-NEXT: v_or_b32_e32 v2, v2, v6 4768; GFX10-NEXT: v_or_b32_e32 v3, v3, v7 4769; GFX10-NEXT: s_setpc_b64 s[30:31] 4770 %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) 4771 ret <2 x i64> %result 4772} 4773 4774define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) { 4775; GFX6-LABEL: s_fshr_i128: 4776; GFX6: ; %bb.0: 4777; GFX6-NEXT: s_movk_i32 s10, 0x7f 4778; GFX6-NEXT: s_mov_b32 s11, 0 4779; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 4780; GFX6-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] 4781; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4782; GFX6-NEXT: s_lshr_b32 s10, s1, 31 4783; GFX6-NEXT: s_lshl_b64 s[14:15], s[0:1], 1 4784; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[10:11] 4785; GFX6-NEXT: s_sub_i32 s13, s8, 64 4786; GFX6-NEXT: s_sub_i32 s9, 64, s8 4787; GFX6-NEXT: s_cmp_lt_u32 s8, 64 4788; GFX6-NEXT: s_cselect_b32 s16, 1, 0 4789; GFX6-NEXT: s_cmp_eq_u32 s8, 0 4790; GFX6-NEXT: s_cselect_b32 s17, 1, 0 4791; GFX6-NEXT: s_lshl_b64 s[2:3], s[14:15], s8 4792; GFX6-NEXT: s_lshr_b64 s[10:11], s[14:15], s9 4793; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 4794; GFX6-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] 4795; GFX6-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 4796; GFX6-NEXT: s_cmp_lg_u32 s16, 0 4797; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 4798; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 4799; GFX6-NEXT: s_cmp_lg_u32 s17, 0 4800; GFX6-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] 4801; GFX6-NEXT: s_sub_i32 s14, s12, 64 4802; GFX6-NEXT: s_sub_i32 s13, 64, s12 4803; GFX6-NEXT: s_cmp_lt_u32 s12, 64 4804; GFX6-NEXT: s_cselect_b32 s15, 1, 0 4805; GFX6-NEXT: s_cmp_eq_u32 s12, 0 4806; GFX6-NEXT: s_cselect_b32 s16, 1, 0 4807; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 4808; GFX6-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 4809; GFX6-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 4810; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] 4811; GFX6-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 4812; GFX6-NEXT: s_cmp_lg_u32 s15, 0 4813; GFX6-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7] 4814; GFX6-NEXT: s_cmp_lg_u32 s16, 0 4815; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 4816; GFX6-NEXT: s_cmp_lg_u32 s15, 0 4817; GFX6-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 4818; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 4819; GFX6-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] 4820; GFX6-NEXT: ; return to shader part epilog 4821; 4822; GFX8-LABEL: s_fshr_i128: 4823; GFX8: ; %bb.0: 4824; GFX8-NEXT: s_movk_i32 s10, 0x7f 4825; GFX8-NEXT: s_mov_b32 s11, 0 4826; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 4827; GFX8-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] 4828; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4829; GFX8-NEXT: s_lshr_b32 s10, s1, 31 4830; GFX8-NEXT: s_lshl_b64 s[14:15], s[0:1], 1 4831; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[10:11] 4832; GFX8-NEXT: s_sub_i32 s13, s8, 64 4833; GFX8-NEXT: s_sub_i32 s9, 64, s8 4834; GFX8-NEXT: s_cmp_lt_u32 s8, 64 4835; GFX8-NEXT: s_cselect_b32 s16, 1, 0 4836; GFX8-NEXT: s_cmp_eq_u32 s8, 0 4837; GFX8-NEXT: s_cselect_b32 s17, 1, 0 4838; GFX8-NEXT: s_lshl_b64 s[2:3], s[14:15], s8 4839; GFX8-NEXT: s_lshr_b64 s[10:11], s[14:15], s9 4840; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 4841; GFX8-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] 4842; GFX8-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 4843; GFX8-NEXT: s_cmp_lg_u32 s16, 0 4844; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 4845; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 4846; GFX8-NEXT: s_cmp_lg_u32 s17, 0 4847; GFX8-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] 4848; GFX8-NEXT: s_sub_i32 s14, s12, 64 4849; GFX8-NEXT: s_sub_i32 s13, 64, s12 4850; GFX8-NEXT: s_cmp_lt_u32 s12, 64 4851; GFX8-NEXT: s_cselect_b32 s15, 1, 0 4852; GFX8-NEXT: s_cmp_eq_u32 s12, 0 4853; GFX8-NEXT: s_cselect_b32 s16, 1, 0 4854; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 4855; GFX8-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 4856; GFX8-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 4857; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] 4858; GFX8-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 4859; GFX8-NEXT: s_cmp_lg_u32 s15, 0 4860; GFX8-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7] 4861; GFX8-NEXT: s_cmp_lg_u32 s16, 0 4862; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 4863; GFX8-NEXT: s_cmp_lg_u32 s15, 0 4864; GFX8-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 4865; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 4866; GFX8-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] 4867; GFX8-NEXT: ; return to shader part epilog 4868; 4869; GFX9-LABEL: s_fshr_i128: 4870; GFX9: ; %bb.0: 4871; GFX9-NEXT: s_movk_i32 s10, 0x7f 4872; GFX9-NEXT: s_mov_b32 s11, 0 4873; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 4874; GFX9-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] 4875; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4876; GFX9-NEXT: s_lshr_b32 s10, s1, 31 4877; GFX9-NEXT: s_lshl_b64 s[14:15], s[0:1], 1 4878; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[10:11] 4879; GFX9-NEXT: s_sub_i32 s13, s8, 64 4880; GFX9-NEXT: s_sub_i32 s9, 64, s8 4881; GFX9-NEXT: s_cmp_lt_u32 s8, 64 4882; GFX9-NEXT: s_cselect_b32 s16, 1, 0 4883; GFX9-NEXT: s_cmp_eq_u32 s8, 0 4884; GFX9-NEXT: s_cselect_b32 s17, 1, 0 4885; GFX9-NEXT: s_lshl_b64 s[2:3], s[14:15], s8 4886; GFX9-NEXT: s_lshr_b64 s[10:11], s[14:15], s9 4887; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 4888; GFX9-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] 4889; GFX9-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 4890; GFX9-NEXT: s_cmp_lg_u32 s16, 0 4891; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 4892; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 4893; GFX9-NEXT: s_cmp_lg_u32 s17, 0 4894; GFX9-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] 4895; GFX9-NEXT: s_sub_i32 s14, s12, 64 4896; GFX9-NEXT: s_sub_i32 s13, 64, s12 4897; GFX9-NEXT: s_cmp_lt_u32 s12, 64 4898; GFX9-NEXT: s_cselect_b32 s15, 1, 0 4899; GFX9-NEXT: s_cmp_eq_u32 s12, 0 4900; GFX9-NEXT: s_cselect_b32 s16, 1, 0 4901; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 4902; GFX9-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 4903; GFX9-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 4904; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] 4905; GFX9-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 4906; GFX9-NEXT: s_cmp_lg_u32 s15, 0 4907; GFX9-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7] 4908; GFX9-NEXT: s_cmp_lg_u32 s16, 0 4909; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 4910; GFX9-NEXT: s_cmp_lg_u32 s15, 0 4911; GFX9-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 4912; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 4913; GFX9-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] 4914; GFX9-NEXT: ; return to shader part epilog 4915; 4916; GFX10-LABEL: s_fshr_i128: 4917; GFX10: ; %bb.0: 4918; GFX10-NEXT: s_movk_i32 s10, 0x7f 4919; GFX10-NEXT: s_mov_b32 s11, 0 4920; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4921; GFX10-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 4922; GFX10-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] 4923; GFX10-NEXT: s_lshr_b32 s10, s1, 31 4924; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4925; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[10:11] 4926; GFX10-NEXT: s_sub_i32 s13, s8, 64 4927; GFX10-NEXT: s_sub_i32 s9, 64, s8 4928; GFX10-NEXT: s_cmp_lt_u32 s8, 64 4929; GFX10-NEXT: s_cselect_b32 s16, 1, 0 4930; GFX10-NEXT: s_cmp_eq_u32 s8, 0 4931; GFX10-NEXT: s_cselect_b32 s17, 1, 0 4932; GFX10-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 4933; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], s8 4934; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 4935; GFX10-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15] 4936; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s13 4937; GFX10-NEXT: s_cmp_lg_u32 s16, 0 4938; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 4939; GFX10-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] 4940; GFX10-NEXT: s_cmp_lg_u32 s17, 0 4941; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 4942; GFX10-NEXT: s_sub_i32 s14, s12, 64 4943; GFX10-NEXT: s_sub_i32 s10, 64, s12 4944; GFX10-NEXT: s_cmp_lt_u32 s12, 64 4945; GFX10-NEXT: s_cselect_b32 s15, 1, 0 4946; GFX10-NEXT: s_cmp_eq_u32 s12, 0 4947; GFX10-NEXT: s_cselect_b32 s16, 1, 0 4948; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], s12 4949; GFX10-NEXT: s_lshl_b64 s[10:11], s[6:7], s10 4950; GFX10-NEXT: s_lshr_b64 s[12:13], s[6:7], s12 4951; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] 4952; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 4953; GFX10-NEXT: s_cmp_lg_u32 s15, 0 4954; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7] 4955; GFX10-NEXT: s_cmp_lg_u32 s16, 0 4956; GFX10-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] 4957; GFX10-NEXT: s_cmp_lg_u32 s15, 0 4958; GFX10-NEXT: s_cselect_b64 s[4:5], s[12:13], 0 4959; GFX10-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 4960; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 4961; GFX10-NEXT: ; return to shader part epilog 4962 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 4963 ret i128 %result 4964} 4965 4966define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) { 4967; GFX6-LABEL: v_fshr_i128: 4968; GFX6: ; %bb.0: 4969; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4970; GFX6-NEXT: s_movk_i32 s4, 0x7f 4971; GFX6-NEXT: v_and_b32_e32 v14, s4, v8 4972; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 4973; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 4974; GFX6-NEXT: v_and_b32_e32 v15, s4, v8 4975; GFX6-NEXT: v_lshl_b64 v[8:9], v[0:1], 1 4976; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1 4977; GFX6-NEXT: v_or_b32_e32 v2, v2, v0 4978; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v15 4979; GFX6-NEXT: v_lshr_b64 v[0:1], v[8:9], v0 4980; GFX6-NEXT: v_lshl_b64 v[10:11], v[2:3], v15 4981; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v15 4982; GFX6-NEXT: v_lshl_b64 v[12:13], v[8:9], v15 4983; GFX6-NEXT: v_or_b32_e32 v10, v0, v10 4984; GFX6-NEXT: v_or_b32_e32 v11, v1, v11 4985; GFX6-NEXT: v_lshl_b64 v[0:1], v[8:9], v16 4986; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 4987; GFX6-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc 4988; GFX6-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc 4989; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 4990; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 4991; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 4992; GFX6-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc 4993; GFX6-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc 4994; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v14 4995; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], v14 4996; GFX6-NEXT: v_lshl_b64 v[2:3], v[6:7], v2 4997; GFX6-NEXT: v_subrev_i32_e32 v15, vcc, 64, v14 4998; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 4999; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 5000; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], v15 5001; GFX6-NEXT: v_lshr_b64 v[8:9], v[6:7], v14 5002; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 5003; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5004; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5005; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 5006; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 5007; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 5008; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 5009; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 5010; GFX6-NEXT: v_or_b32_e32 v0, v12, v0 5011; GFX6-NEXT: v_or_b32_e32 v1, v13, v1 5012; GFX6-NEXT: v_or_b32_e32 v2, v10, v2 5013; GFX6-NEXT: v_or_b32_e32 v3, v11, v3 5014; GFX6-NEXT: s_setpc_b64 s[30:31] 5015; 5016; GFX8-LABEL: v_fshr_i128: 5017; GFX8: ; %bb.0: 5018; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5019; GFX8-NEXT: s_movk_i32 s4, 0x7f 5020; GFX8-NEXT: v_and_b32_e32 v14, s4, v8 5021; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8 5022; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5023; GFX8-NEXT: v_and_b32_e32 v15, s4, v8 5024; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1] 5025; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1 5026; GFX8-NEXT: v_or_b32_e32 v2, v2, v0 5027; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v15 5028; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9] 5029; GFX8-NEXT: v_lshlrev_b64 v[10:11], v15, v[2:3] 5030; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v15 5031; GFX8-NEXT: v_lshlrev_b64 v[12:13], v15, v[8:9] 5032; GFX8-NEXT: v_or_b32_e32 v10, v0, v10 5033; GFX8-NEXT: v_or_b32_e32 v11, v1, v11 5034; GFX8-NEXT: v_lshlrev_b64 v[0:1], v16, v[8:9] 5035; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 5036; GFX8-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc 5037; GFX8-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc 5038; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 5039; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 5040; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 5041; GFX8-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc 5042; GFX8-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc 5043; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v14 5044; GFX8-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5] 5045; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7] 5046; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, 64, v14 5047; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 5048; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 5049; GFX8-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7] 5050; GFX8-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7] 5051; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 5052; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5053; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5054; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 5055; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 5056; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 5057; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 5058; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 5059; GFX8-NEXT: v_or_b32_e32 v0, v12, v0 5060; GFX8-NEXT: v_or_b32_e32 v1, v13, v1 5061; GFX8-NEXT: v_or_b32_e32 v2, v10, v2 5062; GFX8-NEXT: v_or_b32_e32 v3, v11, v3 5063; GFX8-NEXT: s_setpc_b64 s[30:31] 5064; 5065; GFX9-LABEL: v_fshr_i128: 5066; GFX9: ; %bb.0: 5067; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5068; GFX9-NEXT: s_movk_i32 s4, 0x7f 5069; GFX9-NEXT: v_and_b32_e32 v14, s4, v8 5070; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8 5071; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5072; GFX9-NEXT: v_and_b32_e32 v15, s4, v8 5073; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1] 5074; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1 5075; GFX9-NEXT: v_or_b32_e32 v2, v2, v0 5076; GFX9-NEXT: v_sub_u32_e32 v0, 64, v15 5077; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9] 5078; GFX9-NEXT: v_lshlrev_b64 v[10:11], v15, v[2:3] 5079; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v15 5080; GFX9-NEXT: v_lshlrev_b64 v[12:13], v15, v[8:9] 5081; GFX9-NEXT: v_or_b32_e32 v10, v0, v10 5082; GFX9-NEXT: v_or_b32_e32 v11, v1, v11 5083; GFX9-NEXT: v_lshlrev_b64 v[0:1], v16, v[8:9] 5084; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 5085; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc 5086; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc 5087; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 5088; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 5089; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 5090; GFX9-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc 5091; GFX9-NEXT: v_sub_u32_e32 v2, 64, v14 5092; GFX9-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc 5093; GFX9-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5] 5094; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7] 5095; GFX9-NEXT: v_subrev_u32_e32 v15, 64, v14 5096; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 5097; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 5098; GFX9-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7] 5099; GFX9-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7] 5100; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 5101; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5102; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5103; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 5104; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 5105; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 5106; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 5107; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 5108; GFX9-NEXT: v_or_b32_e32 v0, v12, v0 5109; GFX9-NEXT: v_or_b32_e32 v1, v13, v1 5110; GFX9-NEXT: v_or_b32_e32 v2, v10, v2 5111; GFX9-NEXT: v_or_b32_e32 v3, v11, v3 5112; GFX9-NEXT: s_setpc_b64 s[30:31] 5113; 5114; GFX10-LABEL: v_fshr_i128: 5115; GFX10: ; %bb.0: 5116; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5117; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5118; GFX10-NEXT: v_xor_b32_e32 v9, -1, v8 5119; GFX10-NEXT: s_movk_i32 s4, 0x7f 5120; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5121; GFX10-NEXT: v_lshrrev_b32_e32 v10, 31, v1 5122; GFX10-NEXT: v_and_b32_e32 v19, s4, v8 5123; GFX10-NEXT: v_and_b32_e32 v18, s4, v9 5124; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5125; GFX10-NEXT: v_or_b32_e32 v2, v2, v10 5126; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19 5127; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v18 5128; GFX10-NEXT: v_subrev_nc_u32_e32 v21, 64, v18 5129; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v19 5130; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3] 5131; GFX10-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5] 5132; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1] 5133; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7] 5134; GFX10-NEXT: v_lshlrev_b64 v[14:15], v18, v[0:1] 5135; GFX10-NEXT: v_lshlrev_b64 v[0:1], v21, v[0:1] 5136; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 5137; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19 5138; GFX10-NEXT: v_or_b32_e32 v10, v10, v8 5139; GFX10-NEXT: v_or_b32_e32 v11, v11, v9 5140; GFX10-NEXT: v_lshrrev_b64 v[8:9], v20, v[6:7] 5141; GFX10-NEXT: v_or_b32_e32 v12, v12, v16 5142; GFX10-NEXT: v_or_b32_e32 v13, v13, v17 5143; GFX10-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo 5144; GFX10-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo 5145; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7] 5146; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v12, s4 5147; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v19 5148; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v18 5149; GFX10-NEXT: v_cndmask_b32_e64 v6, v9, v13, s4 5150; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc_lo 5151; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo 5152; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5 5153; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s6 5154; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s6 5155; GFX10-NEXT: v_cndmask_b32_e64 v5, v6, v5, s5 5156; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v0, s4 5157; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v1, s4 5158; GFX10-NEXT: v_or_b32_e32 v0, v14, v4 5159; GFX10-NEXT: v_or_b32_e32 v1, v7, v5 5160; GFX10-NEXT: v_or_b32_e32 v2, v2, v6 5161; GFX10-NEXT: v_or_b32_e32 v3, v3, v8 5162; GFX10-NEXT: s_setpc_b64 s[30:31] 5163 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 5164 ret i128 %result 5165} 5166 5167define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) { 5168; GFX6-LABEL: v_fshr_i128_ssv: 5169; GFX6: ; %bb.0: 5170; GFX6-NEXT: s_movk_i32 s8, 0x7f 5171; GFX6-NEXT: v_and_b32_e32 v6, s8, v0 5172; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 5173; GFX6-NEXT: s_mov_b32 s9, 0 5174; GFX6-NEXT: v_and_b32_e32 v7, s8, v0 5175; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5176; GFX6-NEXT: s_lshr_b32 s8, s1, 31 5177; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5178; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 5179; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v7 5180; GFX6-NEXT: v_lshr_b64 v[0:1], s[10:11], v0 5181; GFX6-NEXT: v_lshl_b64 v[2:3], s[0:1], v7 5182; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v7 5183; GFX6-NEXT: v_lshl_b64 v[4:5], s[10:11], v7 5184; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 5185; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 5186; GFX6-NEXT: v_lshl_b64 v[0:1], s[10:11], v8 5187; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 5188; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 5189; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 5190; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5191; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5192; GFX6-NEXT: v_mov_b32_e32 v2, s0 5193; GFX6-NEXT: v_mov_b32_e32 v3, s1 5194; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 5195; GFX6-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc 5196; GFX6-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc 5197; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v6 5198; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v6 5199; GFX6-NEXT: v_lshl_b64 v[2:3], s[6:7], v2 5200; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 64, v6 5201; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 5202; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 5203; GFX6-NEXT: v_lshr_b64 v[0:1], s[6:7], v11 5204; GFX6-NEXT: v_lshr_b64 v[4:5], s[6:7], v6 5205; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 5206; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5207; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5208; GFX6-NEXT: v_mov_b32_e32 v2, s4 5209; GFX6-NEXT: v_mov_b32_e32 v3, s5 5210; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 5211; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 5212; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 5213; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 5214; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 5215; GFX6-NEXT: v_or_b32_e32 v0, v8, v0 5216; GFX6-NEXT: v_or_b32_e32 v1, v9, v1 5217; GFX6-NEXT: v_or_b32_e32 v2, v7, v2 5218; GFX6-NEXT: v_or_b32_e32 v3, v10, v3 5219; GFX6-NEXT: ; return to shader part epilog 5220; 5221; GFX8-LABEL: v_fshr_i128_ssv: 5222; GFX8: ; %bb.0: 5223; GFX8-NEXT: s_movk_i32 s8, 0x7f 5224; GFX8-NEXT: v_and_b32_e32 v6, s8, v0 5225; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 5226; GFX8-NEXT: s_mov_b32 s9, 0 5227; GFX8-NEXT: v_and_b32_e32 v7, s8, v0 5228; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5229; GFX8-NEXT: s_lshr_b32 s8, s1, 31 5230; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5231; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 5232; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v7 5233; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[10:11] 5234; GFX8-NEXT: v_lshlrev_b64 v[2:3], v7, s[0:1] 5235; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v7 5236; GFX8-NEXT: v_lshlrev_b64 v[4:5], v7, s[10:11] 5237; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 5238; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 5239; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, s[10:11] 5240; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 5241; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 5242; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 5243; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5244; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5245; GFX8-NEXT: v_mov_b32_e32 v2, s0 5246; GFX8-NEXT: v_mov_b32_e32 v3, s1 5247; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 5248; GFX8-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc 5249; GFX8-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc 5250; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v6 5251; GFX8-NEXT: v_lshrrev_b64 v[0:1], v6, s[4:5] 5252; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7] 5253; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 64, v6 5254; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 5255; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 5256; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7] 5257; GFX8-NEXT: v_lshrrev_b64 v[4:5], v6, s[6:7] 5258; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 5259; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5260; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5261; GFX8-NEXT: v_mov_b32_e32 v2, s4 5262; GFX8-NEXT: v_mov_b32_e32 v3, s5 5263; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 5264; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 5265; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 5266; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 5267; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 5268; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 5269; GFX8-NEXT: v_or_b32_e32 v1, v9, v1 5270; GFX8-NEXT: v_or_b32_e32 v2, v7, v2 5271; GFX8-NEXT: v_or_b32_e32 v3, v10, v3 5272; GFX8-NEXT: ; return to shader part epilog 5273; 5274; GFX9-LABEL: v_fshr_i128_ssv: 5275; GFX9: ; %bb.0: 5276; GFX9-NEXT: s_movk_i32 s8, 0x7f 5277; GFX9-NEXT: v_and_b32_e32 v6, s8, v0 5278; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 5279; GFX9-NEXT: s_mov_b32 s9, 0 5280; GFX9-NEXT: v_and_b32_e32 v7, s8, v0 5281; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5282; GFX9-NEXT: s_lshr_b32 s8, s1, 31 5283; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5284; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 5285; GFX9-NEXT: v_sub_u32_e32 v0, 64, v7 5286; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[10:11] 5287; GFX9-NEXT: v_lshlrev_b64 v[2:3], v7, s[0:1] 5288; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v7 5289; GFX9-NEXT: v_lshlrev_b64 v[4:5], v7, s[10:11] 5290; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 5291; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 5292; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, s[10:11] 5293; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 5294; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 5295; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 5296; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5297; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5298; GFX9-NEXT: v_mov_b32_e32 v2, s0 5299; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 5300; GFX9-NEXT: v_mov_b32_e32 v3, s1 5301; GFX9-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc 5302; GFX9-NEXT: v_sub_u32_e32 v2, 64, v6 5303; GFX9-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc 5304; GFX9-NEXT: v_lshrrev_b64 v[0:1], v6, s[4:5] 5305; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7] 5306; GFX9-NEXT: v_subrev_u32_e32 v11, 64, v6 5307; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 5308; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 5309; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7] 5310; GFX9-NEXT: v_lshrrev_b64 v[4:5], v6, s[6:7] 5311; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 5312; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5313; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5314; GFX9-NEXT: v_mov_b32_e32 v2, s4 5315; GFX9-NEXT: v_mov_b32_e32 v3, s5 5316; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 5317; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 5318; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 5319; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 5320; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 5321; GFX9-NEXT: v_or_b32_e32 v0, v8, v0 5322; GFX9-NEXT: v_or_b32_e32 v1, v9, v1 5323; GFX9-NEXT: v_or_b32_e32 v2, v7, v2 5324; GFX9-NEXT: v_or_b32_e32 v3, v10, v3 5325; GFX9-NEXT: ; return to shader part epilog 5326; 5327; GFX10-LABEL: v_fshr_i128_ssv: 5328; GFX10: ; %bb.0: 5329; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 5330; GFX10-NEXT: s_movk_i32 s10, 0x7f 5331; GFX10-NEXT: s_mov_b32 s9, 0 5332; GFX10-NEXT: v_and_b32_e32 v13, s10, v0 5333; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5334; GFX10-NEXT: v_and_b32_e32 v12, s10, v1 5335; GFX10-NEXT: s_lshr_b32 s8, s1, 31 5336; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5337; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13 5338; GFX10-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9] 5339; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v12 5340; GFX10-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9] 5341; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v12 5342; GFX10-NEXT: v_subrev_nc_u32_e32 v14, 64, v13 5343; GFX10-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5] 5344; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1] 5345; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7] 5346; GFX10-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1] 5347; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12 5348; GFX10-NEXT: v_lshlrev_b64 v[6:7], v12, s[0:1] 5349; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 64, v13 5350; GFX10-NEXT: v_or_b32_e32 v2, v2, v0 5351; GFX10-NEXT: v_or_b32_e32 v3, v3, v1 5352; GFX10-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7] 5353; GFX10-NEXT: v_or_b32_e32 v4, v4, v8 5354; GFX10-NEXT: v_or_b32_e32 v5, v5, v9 5355; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo 5356; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo 5357; GFX10-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] 5358; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 5359; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 0, v13 5360; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v12 5361; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0 5362; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo 5363; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc_lo 5364; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, s1 5365; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, s8, s2 5366; GFX10-NEXT: v_cndmask_b32_e64 v7, v10, s9, s2 5367; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, s1 5368; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 5369; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 5370; GFX10-NEXT: v_or_b32_e32 v0, v6, v0 5371; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 5372; GFX10-NEXT: v_or_b32_e32 v2, v5, v2 5373; GFX10-NEXT: v_or_b32_e32 v3, v7, v3 5374; GFX10-NEXT: ; return to shader part epilog 5375 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 5376 %cast.result = bitcast i128 %result to <4 x float> 5377 ret <4 x float> %cast.result 5378} 5379 5380define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) { 5381; GFX6-LABEL: v_fshr_i128_svs: 5382; GFX6: ; %bb.0: 5383; GFX6-NEXT: s_movk_i32 s6, 0x7f 5384; GFX6-NEXT: s_mov_b32 s7, 0 5385; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5386; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5387; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5388; GFX6-NEXT: s_lshr_b32 s6, s1, 31 5389; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5390; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] 5391; GFX6-NEXT: s_sub_i32 s9, s4, 64 5392; GFX6-NEXT: s_sub_i32 s5, 64, s4 5393; GFX6-NEXT: s_cmp_lt_u32 s4, 64 5394; GFX6-NEXT: s_cselect_b32 s12, 1, 0 5395; GFX6-NEXT: s_cmp_eq_u32 s4, 0 5396; GFX6-NEXT: s_cselect_b32 s13, 1, 0 5397; GFX6-NEXT: s_lshl_b64 s[2:3], s[10:11], s4 5398; GFX6-NEXT: s_lshr_b64 s[6:7], s[10:11], s5 5399; GFX6-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 5400; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 5401; GFX6-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 5402; GFX6-NEXT: s_cmp_lg_u32 s12, 0 5403; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5404; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 5405; GFX6-NEXT: s_cmp_lg_u32 s13, 0 5406; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 5407; GFX6-NEXT: s_sub_i32 s4, s8, 64 5408; GFX6-NEXT: s_sub_i32 s5, 64, s8 5409; GFX6-NEXT: s_cmp_lt_u32 s8, 64 5410; GFX6-NEXT: s_cselect_b32 s6, 1, 0 5411; GFX6-NEXT: s_cmp_eq_u32 s8, 0 5412; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s8 5413; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s5 5414; GFX6-NEXT: s_cselect_b32 s7, 1, 0 5415; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s8 5416; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s4 5417; GFX6-NEXT: s_and_b32 s4, 1, s6 5418; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 5419; GFX6-NEXT: v_or_b32_e32 v5, v5, v7 5420; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5421; GFX6-NEXT: s_and_b32 s4, 1, s7 5422; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 5423; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 5424; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5425; GFX6-NEXT: s_and_b32 s4, 1, s6 5426; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 5427; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 5428; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5429; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 5430; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 5431; GFX6-NEXT: v_or_b32_e32 v0, s2, v0 5432; GFX6-NEXT: v_or_b32_e32 v1, s3, v1 5433; GFX6-NEXT: v_or_b32_e32 v2, s0, v2 5434; GFX6-NEXT: v_or_b32_e32 v3, s1, v3 5435; GFX6-NEXT: ; return to shader part epilog 5436; 5437; GFX8-LABEL: v_fshr_i128_svs: 5438; GFX8: ; %bb.0: 5439; GFX8-NEXT: s_movk_i32 s6, 0x7f 5440; GFX8-NEXT: s_mov_b32 s7, 0 5441; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5442; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5443; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5444; GFX8-NEXT: s_lshr_b32 s6, s1, 31 5445; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5446; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] 5447; GFX8-NEXT: s_sub_i32 s9, s4, 64 5448; GFX8-NEXT: s_sub_i32 s5, 64, s4 5449; GFX8-NEXT: s_cmp_lt_u32 s4, 64 5450; GFX8-NEXT: s_cselect_b32 s12, 1, 0 5451; GFX8-NEXT: s_cmp_eq_u32 s4, 0 5452; GFX8-NEXT: s_cselect_b32 s13, 1, 0 5453; GFX8-NEXT: s_lshl_b64 s[2:3], s[10:11], s4 5454; GFX8-NEXT: s_lshr_b64 s[6:7], s[10:11], s5 5455; GFX8-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 5456; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 5457; GFX8-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 5458; GFX8-NEXT: s_cmp_lg_u32 s12, 0 5459; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5460; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 5461; GFX8-NEXT: s_cmp_lg_u32 s13, 0 5462; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 5463; GFX8-NEXT: s_sub_i32 s4, s8, 64 5464; GFX8-NEXT: s_sub_i32 s5, 64, s8 5465; GFX8-NEXT: s_cmp_lt_u32 s8, 64 5466; GFX8-NEXT: s_cselect_b32 s6, 1, 0 5467; GFX8-NEXT: s_cmp_eq_u32 s8, 0 5468; GFX8-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] 5469; GFX8-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 5470; GFX8-NEXT: s_cselect_b32 s7, 1, 0 5471; GFX8-NEXT: v_lshrrev_b64 v[8:9], s8, v[2:3] 5472; GFX8-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] 5473; GFX8-NEXT: s_and_b32 s4, 1, s6 5474; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 5475; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 5476; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5477; GFX8-NEXT: s_and_b32 s4, 1, s7 5478; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 5479; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 5480; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5481; GFX8-NEXT: s_and_b32 s4, 1, s6 5482; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 5483; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 5484; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5485; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 5486; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 5487; GFX8-NEXT: v_or_b32_e32 v0, s2, v0 5488; GFX8-NEXT: v_or_b32_e32 v1, s3, v1 5489; GFX8-NEXT: v_or_b32_e32 v2, s0, v2 5490; GFX8-NEXT: v_or_b32_e32 v3, s1, v3 5491; GFX8-NEXT: ; return to shader part epilog 5492; 5493; GFX9-LABEL: v_fshr_i128_svs: 5494; GFX9: ; %bb.0: 5495; GFX9-NEXT: s_movk_i32 s6, 0x7f 5496; GFX9-NEXT: s_mov_b32 s7, 0 5497; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5498; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5499; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5500; GFX9-NEXT: s_lshr_b32 s6, s1, 31 5501; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5502; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] 5503; GFX9-NEXT: s_sub_i32 s9, s4, 64 5504; GFX9-NEXT: s_sub_i32 s5, 64, s4 5505; GFX9-NEXT: s_cmp_lt_u32 s4, 64 5506; GFX9-NEXT: s_cselect_b32 s12, 1, 0 5507; GFX9-NEXT: s_cmp_eq_u32 s4, 0 5508; GFX9-NEXT: s_cselect_b32 s13, 1, 0 5509; GFX9-NEXT: s_lshl_b64 s[2:3], s[10:11], s4 5510; GFX9-NEXT: s_lshr_b64 s[6:7], s[10:11], s5 5511; GFX9-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 5512; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 5513; GFX9-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 5514; GFX9-NEXT: s_cmp_lg_u32 s12, 0 5515; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5516; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 5517; GFX9-NEXT: s_cmp_lg_u32 s13, 0 5518; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 5519; GFX9-NEXT: s_sub_i32 s4, s8, 64 5520; GFX9-NEXT: s_sub_i32 s5, 64, s8 5521; GFX9-NEXT: s_cmp_lt_u32 s8, 64 5522; GFX9-NEXT: s_cselect_b32 s6, 1, 0 5523; GFX9-NEXT: s_cmp_eq_u32 s8, 0 5524; GFX9-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] 5525; GFX9-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 5526; GFX9-NEXT: s_cselect_b32 s7, 1, 0 5527; GFX9-NEXT: v_lshrrev_b64 v[8:9], s8, v[2:3] 5528; GFX9-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] 5529; GFX9-NEXT: s_and_b32 s4, 1, s6 5530; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 5531; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 5532; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5533; GFX9-NEXT: s_and_b32 s4, 1, s7 5534; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 5535; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 5536; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5537; GFX9-NEXT: s_and_b32 s4, 1, s6 5538; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 5539; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 5540; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5541; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 5542; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 5543; GFX9-NEXT: v_or_b32_e32 v0, s2, v0 5544; GFX9-NEXT: v_or_b32_e32 v1, s3, v1 5545; GFX9-NEXT: v_or_b32_e32 v2, s0, v2 5546; GFX9-NEXT: v_or_b32_e32 v3, s1, v3 5547; GFX9-NEXT: ; return to shader part epilog 5548; 5549; GFX10-LABEL: v_fshr_i128_svs: 5550; GFX10: ; %bb.0: 5551; GFX10-NEXT: s_movk_i32 s6, 0x7f 5552; GFX10-NEXT: s_mov_b32 s7, 0 5553; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5554; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5555; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5556; GFX10-NEXT: s_lshr_b32 s6, s1, 31 5557; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5558; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 5559; GFX10-NEXT: s_sub_i32 s9, s4, 64 5560; GFX10-NEXT: s_sub_i32 s5, 64, s4 5561; GFX10-NEXT: s_cmp_lt_u32 s4, 64 5562; GFX10-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] 5563; GFX10-NEXT: s_cselect_b32 s12, 1, 0 5564; GFX10-NEXT: s_cmp_eq_u32 s4, 0 5565; GFX10-NEXT: s_cselect_b32 s13, 1, 0 5566; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s5 5567; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], s4 5568; GFX10-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 5569; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] 5570; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 5571; GFX10-NEXT: s_cmp_lg_u32 s12, 0 5572; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], 0 5573; GFX10-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] 5574; GFX10-NEXT: s_cmp_lg_u32 s13, 0 5575; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 5576; GFX10-NEXT: s_sub_i32 s0, 64, s8 5577; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] 5578; GFX10-NEXT: s_sub_i32 s0, s8, 64 5579; GFX10-NEXT: s_cmp_lt_u32 s8, 64 5580; GFX10-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] 5581; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 5582; GFX10-NEXT: s_cmp_eq_u32 s8, 0 5583; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 5584; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 5585; GFX10-NEXT: s_cselect_b32 s0, 1, 0 5586; GFX10-NEXT: s_and_b32 s1, 1, vcc_lo 5587; GFX10-NEXT: s_and_b32 s0, 1, s0 5588; GFX10-NEXT: v_lshrrev_b64 v[2:3], s8, v[2:3] 5589; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo 5590; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo 5591; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 5592; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 5593; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo 5594; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo 5595; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 5596; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 5597; GFX10-NEXT: v_or_b32_e32 v0, s4, v0 5598; GFX10-NEXT: v_or_b32_e32 v1, s5, v1 5599; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 5600; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 5601; GFX10-NEXT: ; return to shader part epilog 5602 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 5603 %cast.result = bitcast i128 %result to <4 x float> 5604 ret <4 x float> %cast.result 5605} 5606 5607define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) { 5608; GFX6-LABEL: v_fshr_i128_vss: 5609; GFX6: ; %bb.0: 5610; GFX6-NEXT: s_mov_b64 s[6:7], 0x7f 5611; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5612; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5613; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 5614; GFX6-NEXT: s_sub_i32 s5, s4, 64 5615; GFX6-NEXT: s_sub_i32 s6, 64, s4 5616; GFX6-NEXT: v_lshl_b64 v[4:5], v[0:1], 1 5617; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1 5618; GFX6-NEXT: s_cmp_lt_u32 s4, 64 5619; GFX6-NEXT: v_or_b32_e32 v2, v2, v0 5620; GFX6-NEXT: s_cselect_b32 s7, 1, 0 5621; GFX6-NEXT: s_cmp_eq_u32 s4, 0 5622; GFX6-NEXT: s_cselect_b32 s9, 1, 0 5623; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], s6 5624; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s4 5625; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], s4 5626; GFX6-NEXT: s_and_b32 s4, 1, s7 5627; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5628; GFX6-NEXT: s_and_b32 s4, 1, s9 5629; GFX6-NEXT: s_sub_i32 s10, s8, 64 5630; GFX6-NEXT: s_sub_i32 s9, 64, s8 5631; GFX6-NEXT: v_or_b32_e32 v6, v0, v6 5632; GFX6-NEXT: v_or_b32_e32 v7, v1, v7 5633; GFX6-NEXT: v_lshl_b64 v[0:1], v[4:5], s5 5634; GFX6-NEXT: s_cmp_lt_u32 s8, 64 5635; GFX6-NEXT: s_cselect_b32 s11, 1, 0 5636; GFX6-NEXT: s_cmp_eq_u32 s8, 0 5637; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc 5638; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc 5639; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 5640; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 5641; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5642; GFX6-NEXT: s_cselect_b32 s12, 1, 0 5643; GFX6-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 5644; GFX6-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 5645; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 5646; GFX6-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 5647; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 5648; GFX6-NEXT: s_cmp_lg_u32 s11, 0 5649; GFX6-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 5650; GFX6-NEXT: s_cmp_lg_u32 s12, 0 5651; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 5652; GFX6-NEXT: s_cmp_lg_u32 s11, 0 5653; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 5654; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 5655; GFX6-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 5656; GFX6-NEXT: v_or_b32_e32 v0, s0, v4 5657; GFX6-NEXT: v_or_b32_e32 v1, s1, v5 5658; GFX6-NEXT: v_or_b32_e32 v2, s2, v2 5659; GFX6-NEXT: v_or_b32_e32 v3, s3, v3 5660; GFX6-NEXT: ; return to shader part epilog 5661; 5662; GFX8-LABEL: v_fshr_i128_vss: 5663; GFX8: ; %bb.0: 5664; GFX8-NEXT: s_mov_b64 s[6:7], 0x7f 5665; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5666; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5667; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5668; GFX8-NEXT: s_sub_i32 s5, s4, 64 5669; GFX8-NEXT: s_sub_i32 s6, 64, s4 5670; GFX8-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1] 5671; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1 5672; GFX8-NEXT: s_cmp_lt_u32 s4, 64 5673; GFX8-NEXT: v_or_b32_e32 v2, v2, v0 5674; GFX8-NEXT: s_cselect_b32 s7, 1, 0 5675; GFX8-NEXT: s_cmp_eq_u32 s4, 0 5676; GFX8-NEXT: s_cselect_b32 s9, 1, 0 5677; GFX8-NEXT: v_lshrrev_b64 v[0:1], s6, v[4:5] 5678; GFX8-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 5679; GFX8-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5] 5680; GFX8-NEXT: s_and_b32 s4, 1, s7 5681; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5682; GFX8-NEXT: s_and_b32 s4, 1, s9 5683; GFX8-NEXT: s_sub_i32 s10, s8, 64 5684; GFX8-NEXT: s_sub_i32 s9, 64, s8 5685; GFX8-NEXT: v_or_b32_e32 v6, v0, v6 5686; GFX8-NEXT: v_or_b32_e32 v7, v1, v7 5687; GFX8-NEXT: v_lshlrev_b64 v[0:1], s5, v[4:5] 5688; GFX8-NEXT: s_cmp_lt_u32 s8, 64 5689; GFX8-NEXT: s_cselect_b32 s11, 1, 0 5690; GFX8-NEXT: s_cmp_eq_u32 s8, 0 5691; GFX8-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc 5692; GFX8-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc 5693; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 5694; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 5695; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5696; GFX8-NEXT: s_cselect_b32 s12, 1, 0 5697; GFX8-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 5698; GFX8-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 5699; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 5700; GFX8-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 5701; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 5702; GFX8-NEXT: s_cmp_lg_u32 s11, 0 5703; GFX8-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 5704; GFX8-NEXT: s_cmp_lg_u32 s12, 0 5705; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 5706; GFX8-NEXT: s_cmp_lg_u32 s11, 0 5707; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 5708; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 5709; GFX8-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 5710; GFX8-NEXT: v_or_b32_e32 v0, s0, v4 5711; GFX8-NEXT: v_or_b32_e32 v1, s1, v5 5712; GFX8-NEXT: v_or_b32_e32 v2, s2, v2 5713; GFX8-NEXT: v_or_b32_e32 v3, s3, v3 5714; GFX8-NEXT: ; return to shader part epilog 5715; 5716; GFX9-LABEL: v_fshr_i128_vss: 5717; GFX9: ; %bb.0: 5718; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f 5719; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5720; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5721; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5722; GFX9-NEXT: s_sub_i32 s5, s4, 64 5723; GFX9-NEXT: s_sub_i32 s6, 64, s4 5724; GFX9-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1] 5725; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1 5726; GFX9-NEXT: s_cmp_lt_u32 s4, 64 5727; GFX9-NEXT: v_or_b32_e32 v2, v2, v0 5728; GFX9-NEXT: s_cselect_b32 s7, 1, 0 5729; GFX9-NEXT: s_cmp_eq_u32 s4, 0 5730; GFX9-NEXT: s_cselect_b32 s9, 1, 0 5731; GFX9-NEXT: v_lshrrev_b64 v[0:1], s6, v[4:5] 5732; GFX9-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 5733; GFX9-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5] 5734; GFX9-NEXT: s_and_b32 s4, 1, s7 5735; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5736; GFX9-NEXT: s_and_b32 s4, 1, s9 5737; GFX9-NEXT: s_sub_i32 s10, s8, 64 5738; GFX9-NEXT: s_sub_i32 s9, 64, s8 5739; GFX9-NEXT: v_or_b32_e32 v6, v0, v6 5740; GFX9-NEXT: v_or_b32_e32 v7, v1, v7 5741; GFX9-NEXT: v_lshlrev_b64 v[0:1], s5, v[4:5] 5742; GFX9-NEXT: s_cmp_lt_u32 s8, 64 5743; GFX9-NEXT: s_cselect_b32 s11, 1, 0 5744; GFX9-NEXT: s_cmp_eq_u32 s8, 0 5745; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc 5746; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc 5747; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 5748; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 5749; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5750; GFX9-NEXT: s_cselect_b32 s12, 1, 0 5751; GFX9-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 5752; GFX9-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 5753; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 5754; GFX9-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 5755; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 5756; GFX9-NEXT: s_cmp_lg_u32 s11, 0 5757; GFX9-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 5758; GFX9-NEXT: s_cmp_lg_u32 s12, 0 5759; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 5760; GFX9-NEXT: s_cmp_lg_u32 s11, 0 5761; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 5762; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 5763; GFX9-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 5764; GFX9-NEXT: v_or_b32_e32 v0, s0, v4 5765; GFX9-NEXT: v_or_b32_e32 v1, s1, v5 5766; GFX9-NEXT: v_or_b32_e32 v2, s2, v2 5767; GFX9-NEXT: v_or_b32_e32 v3, s3, v3 5768; GFX9-NEXT: ; return to shader part epilog 5769; 5770; GFX10-LABEL: v_fshr_i128_vss: 5771; GFX10: ; %bb.0: 5772; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5773; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v1 5774; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f 5775; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5776; GFX10-NEXT: s_andn2_b64 s[8:9], s[6:7], s[4:5] 5777; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] 5778; GFX10-NEXT: v_or_b32_e32 v2, v2, v4 5779; GFX10-NEXT: s_sub_i32 s4, 64, s8 5780; GFX10-NEXT: s_sub_i32 s5, s8, 64 5781; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1] 5782; GFX10-NEXT: s_cmp_lt_u32 s8, 64 5783; GFX10-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3] 5784; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0 5785; GFX10-NEXT: s_cmp_eq_u32 s8, 0 5786; GFX10-NEXT: v_lshlrev_b64 v[8:9], s8, v[0:1] 5787; GFX10-NEXT: s_cselect_b32 s7, 1, 0 5788; GFX10-NEXT: s_and_b32 s4, 1, vcc_lo 5789; GFX10-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] 5790; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 5791; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 5792; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s4 5793; GFX10-NEXT: s_sub_i32 s10, s6, 64 5794; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo 5795; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo 5796; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s4 5797; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s4 5798; GFX10-NEXT: s_and_b32 s4, 1, s7 5799; GFX10-NEXT: s_sub_i32 s7, 64, s6 5800; GFX10-NEXT: s_cmp_lt_u32 s6, 64 5801; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 5802; GFX10-NEXT: s_cselect_b32 s11, 1, 0 5803; GFX10-NEXT: s_cmp_eq_u32 s6, 0 5804; GFX10-NEXT: s_cselect_b32 s12, 1, 0 5805; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s6 5806; GFX10-NEXT: s_lshl_b64 s[8:9], s[2:3], s7 5807; GFX10-NEXT: s_lshr_b64 s[6:7], s[2:3], s6 5808; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 5809; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 5810; GFX10-NEXT: s_cmp_lg_u32 s11, 0 5811; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo 5812; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] 5813; GFX10-NEXT: s_cmp_lg_u32 s12, 0 5814; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo 5815; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 5816; GFX10-NEXT: s_cmp_lg_u32 s11, 0 5817; GFX10-NEXT: v_or_b32_e32 v0, s0, v6 5818; GFX10-NEXT: s_cselect_b64 s[2:3], s[6:7], 0 5819; GFX10-NEXT: v_or_b32_e32 v1, s1, v7 5820; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 5821; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 5822; GFX10-NEXT: ; return to shader part epilog 5823 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 5824 %cast.result = bitcast i128 %result to <4 x float> 5825 ret <4 x float> %cast.result 5826} 5827 5828define amdgpu_ps i128 @s_fshr_i128_65(i128 inreg %lhs, i128 inreg %rhs) { 5829; GFX6-LABEL: s_fshr_i128_65: 5830; GFX6: ; %bb.0: 5831; GFX6-NEXT: s_mov_b32 s4, 0 5832; GFX6-NEXT: s_lshl_b32 s5, s0, 31 5833; GFX6-NEXT: s_lshl_b32 s3, s2, 31 5834; GFX6-NEXT: s_mov_b32 s2, s4 5835; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 5836; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1] 5837; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], 1 5838; GFX6-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 5839; GFX6-NEXT: ; return to shader part epilog 5840; 5841; GFX8-LABEL: s_fshr_i128_65: 5842; GFX8: ; %bb.0: 5843; GFX8-NEXT: s_mov_b32 s4, 0 5844; GFX8-NEXT: s_lshl_b32 s5, s0, 31 5845; GFX8-NEXT: s_lshl_b32 s3, s2, 31 5846; GFX8-NEXT: s_mov_b32 s2, s4 5847; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 5848; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1] 5849; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], 1 5850; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 5851; GFX8-NEXT: ; return to shader part epilog 5852; 5853; GFX9-LABEL: s_fshr_i128_65: 5854; GFX9: ; %bb.0: 5855; GFX9-NEXT: s_mov_b32 s4, 0 5856; GFX9-NEXT: s_lshl_b32 s5, s0, 31 5857; GFX9-NEXT: s_lshl_b32 s3, s2, 31 5858; GFX9-NEXT: s_mov_b32 s2, s4 5859; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 5860; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1] 5861; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], 1 5862; GFX9-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 5863; GFX9-NEXT: ; return to shader part epilog 5864; 5865; GFX10-LABEL: s_fshr_i128_65: 5866; GFX10: ; %bb.0: 5867; GFX10-NEXT: s_mov_b32 s4, 0 5868; GFX10-NEXT: s_lshl_b32 s5, s0, 31 5869; GFX10-NEXT: s_lshl_b32 s3, s2, 31 5870; GFX10-NEXT: s_mov_b32 s2, s4 5871; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1 5872; GFX10-NEXT: s_lshr_b64 s[8:9], s[0:1], 1 5873; GFX10-NEXT: s_or_b64 s[0:1], s[4:5], s[6:7] 5874; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 5875; GFX10-NEXT: ; return to shader part epilog 5876 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65) 5877 ret i128 %result 5878} 5879 5880define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) { 5881; GFX6-LABEL: v_fshr_i128_65: 5882; GFX6: ; %bb.0: 5883; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5884; GFX6-NEXT: v_lshlrev_b32_e32 v4, 31, v0 5885; GFX6-NEXT: v_lshlrev_b32_e32 v5, 31, v2 5886; GFX6-NEXT: v_lshr_b64 v[2:3], v[0:1], 1 5887; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], 1 5888; GFX6-NEXT: v_or_b32_e32 v3, v5, v3 5889; GFX6-NEXT: v_or_b32_e32 v1, v4, v1 5890; GFX6-NEXT: s_setpc_b64 s[30:31] 5891; 5892; GFX8-LABEL: v_fshr_i128_65: 5893; GFX8: ; %bb.0: 5894; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5895; GFX8-NEXT: v_lshlrev_b32_e32 v4, 31, v0 5896; GFX8-NEXT: v_lshlrev_b32_e32 v5, 31, v2 5897; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 5898; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[6:7] 5899; GFX8-NEXT: v_or_b32_e32 v3, v5, v3 5900; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 5901; GFX8-NEXT: s_setpc_b64 s[30:31] 5902; 5903; GFX9-LABEL: v_fshr_i128_65: 5904; GFX9: ; %bb.0: 5905; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5906; GFX9-NEXT: v_lshlrev_b32_e32 v4, 31, v0 5907; GFX9-NEXT: v_lshlrev_b32_e32 v5, 31, v2 5908; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 5909; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[6:7] 5910; GFX9-NEXT: v_or_b32_e32 v3, v5, v3 5911; GFX9-NEXT: v_or_b32_e32 v1, v4, v1 5912; GFX9-NEXT: s_setpc_b64 s[30:31] 5913; 5914; GFX10-LABEL: v_fshr_i128_65: 5915; GFX10: ; %bb.0: 5916; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5917; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5918; GFX10-NEXT: v_mov_b32_e32 v8, v2 5919; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7] 5920; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 5921; GFX10-NEXT: v_lshlrev_b32_e32 v9, 31, v0 5922; GFX10-NEXT: v_lshlrev_b32_e32 v0, 31, v8 5923; GFX10-NEXT: v_or_b32_e32 v1, v9, v5 5924; GFX10-NEXT: v_or_b32_e32 v3, v0, v3 5925; GFX10-NEXT: v_mov_b32_e32 v0, v4 5926; GFX10-NEXT: s_setpc_b64 s[30:31] 5927 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65) 5928 ret i128 %result 5929} 5930 5931define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) { 5932; GFX6-LABEL: s_fshr_v2i128: 5933; GFX6: ; %bb.0: 5934; GFX6-NEXT: s_movk_i32 s18, 0x7f 5935; GFX6-NEXT: s_mov_b32 s19, 0 5936; GFX6-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 5937; GFX6-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] 5938; GFX6-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 5939; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5940; GFX6-NEXT: s_lshr_b32 s0, s1, 31 5941; GFX6-NEXT: s_mov_b32 s1, s19 5942; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 5943; GFX6-NEXT: s_sub_i32 s23, s16, 64 5944; GFX6-NEXT: s_sub_i32 s17, 64, s16 5945; GFX6-NEXT: s_cmp_lt_u32 s16, 64 5946; GFX6-NEXT: s_cselect_b32 s28, 1, 0 5947; GFX6-NEXT: s_cmp_eq_u32 s16, 0 5948; GFX6-NEXT: s_cselect_b32 s29, 1, 0 5949; GFX6-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 5950; GFX6-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 5951; GFX6-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 5952; GFX6-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] 5953; GFX6-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 5954; GFX6-NEXT: s_cmp_lg_u32 s28, 0 5955; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5956; GFX6-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] 5957; GFX6-NEXT: s_cmp_lg_u32 s29, 0 5958; GFX6-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] 5959; GFX6-NEXT: s_sub_i32 s26, s22, 64 5960; GFX6-NEXT: s_sub_i32 s24, 64, s22 5961; GFX6-NEXT: s_cmp_lt_u32 s22, 64 5962; GFX6-NEXT: s_cselect_b32 s27, 1, 0 5963; GFX6-NEXT: s_cmp_eq_u32 s22, 0 5964; GFX6-NEXT: s_cselect_b32 s28, 1, 0 5965; GFX6-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 5966; GFX6-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 5967; GFX6-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 5968; GFX6-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 5969; GFX6-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 5970; GFX6-NEXT: s_cmp_lg_u32 s27, 0 5971; GFX6-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] 5972; GFX6-NEXT: s_cmp_lg_u32 s28, 0 5973; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 5974; GFX6-NEXT: s_cmp_lg_u32 s27, 0 5975; GFX6-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 5976; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 5977; GFX6-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] 5978; GFX6-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 5979; GFX6-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] 5980; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 5981; GFX6-NEXT: s_lshr_b32 s18, s5, 31 5982; GFX6-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 5983; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[18:19] 5984; GFX6-NEXT: s_sub_i32 s9, s10, 64 5985; GFX6-NEXT: s_sub_i32 s11, 64, s10 5986; GFX6-NEXT: s_cmp_lt_u32 s10, 64 5987; GFX6-NEXT: s_cselect_b32 s20, 1, 0 5988; GFX6-NEXT: s_cmp_eq_u32 s10, 0 5989; GFX6-NEXT: s_cselect_b32 s21, 1, 0 5990; GFX6-NEXT: s_lshl_b64 s[6:7], s[16:17], s10 5991; GFX6-NEXT: s_lshr_b64 s[18:19], s[16:17], s11 5992; GFX6-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 5993; GFX6-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11] 5994; GFX6-NEXT: s_lshl_b64 s[16:17], s[16:17], s9 5995; GFX6-NEXT: s_cmp_lg_u32 s20, 0 5996; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 5997; GFX6-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17] 5998; GFX6-NEXT: s_cmp_lg_u32 s21, 0 5999; GFX6-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11] 6000; GFX6-NEXT: s_sub_i32 s18, s8, 64 6001; GFX6-NEXT: s_sub_i32 s16, 64, s8 6002; GFX6-NEXT: s_cmp_lt_u32 s8, 64 6003; GFX6-NEXT: s_cselect_b32 s19, 1, 0 6004; GFX6-NEXT: s_cmp_eq_u32 s8, 0 6005; GFX6-NEXT: s_cselect_b32 s20, 1, 0 6006; GFX6-NEXT: s_lshr_b64 s[4:5], s[14:15], s8 6007; GFX6-NEXT: s_lshr_b64 s[8:9], s[12:13], s8 6008; GFX6-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 6009; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17] 6010; GFX6-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 6011; GFX6-NEXT: s_cmp_lg_u32 s19, 0 6012; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15] 6013; GFX6-NEXT: s_cmp_lg_u32 s20, 0 6014; GFX6-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] 6015; GFX6-NEXT: s_cmp_lg_u32 s19, 0 6016; GFX6-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 6017; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9] 6018; GFX6-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13] 6019; GFX6-NEXT: ; return to shader part epilog 6020; 6021; GFX8-LABEL: s_fshr_v2i128: 6022; GFX8: ; %bb.0: 6023; GFX8-NEXT: s_movk_i32 s18, 0x7f 6024; GFX8-NEXT: s_mov_b32 s19, 0 6025; GFX8-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 6026; GFX8-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] 6027; GFX8-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 6028; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6029; GFX8-NEXT: s_lshr_b32 s0, s1, 31 6030; GFX8-NEXT: s_mov_b32 s1, s19 6031; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 6032; GFX8-NEXT: s_sub_i32 s23, s16, 64 6033; GFX8-NEXT: s_sub_i32 s17, 64, s16 6034; GFX8-NEXT: s_cmp_lt_u32 s16, 64 6035; GFX8-NEXT: s_cselect_b32 s28, 1, 0 6036; GFX8-NEXT: s_cmp_eq_u32 s16, 0 6037; GFX8-NEXT: s_cselect_b32 s29, 1, 0 6038; GFX8-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 6039; GFX8-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 6040; GFX8-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 6041; GFX8-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] 6042; GFX8-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 6043; GFX8-NEXT: s_cmp_lg_u32 s28, 0 6044; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 6045; GFX8-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] 6046; GFX8-NEXT: s_cmp_lg_u32 s29, 0 6047; GFX8-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] 6048; GFX8-NEXT: s_sub_i32 s26, s22, 64 6049; GFX8-NEXT: s_sub_i32 s24, 64, s22 6050; GFX8-NEXT: s_cmp_lt_u32 s22, 64 6051; GFX8-NEXT: s_cselect_b32 s27, 1, 0 6052; GFX8-NEXT: s_cmp_eq_u32 s22, 0 6053; GFX8-NEXT: s_cselect_b32 s28, 1, 0 6054; GFX8-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 6055; GFX8-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 6056; GFX8-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 6057; GFX8-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 6058; GFX8-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 6059; GFX8-NEXT: s_cmp_lg_u32 s27, 0 6060; GFX8-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] 6061; GFX8-NEXT: s_cmp_lg_u32 s28, 0 6062; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 6063; GFX8-NEXT: s_cmp_lg_u32 s27, 0 6064; GFX8-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 6065; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 6066; GFX8-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] 6067; GFX8-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 6068; GFX8-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] 6069; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 6070; GFX8-NEXT: s_lshr_b32 s18, s5, 31 6071; GFX8-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 6072; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[18:19] 6073; GFX8-NEXT: s_sub_i32 s9, s10, 64 6074; GFX8-NEXT: s_sub_i32 s11, 64, s10 6075; GFX8-NEXT: s_cmp_lt_u32 s10, 64 6076; GFX8-NEXT: s_cselect_b32 s20, 1, 0 6077; GFX8-NEXT: s_cmp_eq_u32 s10, 0 6078; GFX8-NEXT: s_cselect_b32 s21, 1, 0 6079; GFX8-NEXT: s_lshl_b64 s[6:7], s[16:17], s10 6080; GFX8-NEXT: s_lshr_b64 s[18:19], s[16:17], s11 6081; GFX8-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 6082; GFX8-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11] 6083; GFX8-NEXT: s_lshl_b64 s[16:17], s[16:17], s9 6084; GFX8-NEXT: s_cmp_lg_u32 s20, 0 6085; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 6086; GFX8-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17] 6087; GFX8-NEXT: s_cmp_lg_u32 s21, 0 6088; GFX8-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11] 6089; GFX8-NEXT: s_sub_i32 s18, s8, 64 6090; GFX8-NEXT: s_sub_i32 s16, 64, s8 6091; GFX8-NEXT: s_cmp_lt_u32 s8, 64 6092; GFX8-NEXT: s_cselect_b32 s19, 1, 0 6093; GFX8-NEXT: s_cmp_eq_u32 s8, 0 6094; GFX8-NEXT: s_cselect_b32 s20, 1, 0 6095; GFX8-NEXT: s_lshr_b64 s[4:5], s[14:15], s8 6096; GFX8-NEXT: s_lshr_b64 s[8:9], s[12:13], s8 6097; GFX8-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 6098; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17] 6099; GFX8-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 6100; GFX8-NEXT: s_cmp_lg_u32 s19, 0 6101; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15] 6102; GFX8-NEXT: s_cmp_lg_u32 s20, 0 6103; GFX8-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] 6104; GFX8-NEXT: s_cmp_lg_u32 s19, 0 6105; GFX8-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 6106; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9] 6107; GFX8-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13] 6108; GFX8-NEXT: ; return to shader part epilog 6109; 6110; GFX9-LABEL: s_fshr_v2i128: 6111; GFX9: ; %bb.0: 6112; GFX9-NEXT: s_movk_i32 s18, 0x7f 6113; GFX9-NEXT: s_mov_b32 s19, 0 6114; GFX9-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 6115; GFX9-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] 6116; GFX9-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 6117; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6118; GFX9-NEXT: s_lshr_b32 s0, s1, 31 6119; GFX9-NEXT: s_mov_b32 s1, s19 6120; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 6121; GFX9-NEXT: s_sub_i32 s23, s16, 64 6122; GFX9-NEXT: s_sub_i32 s17, 64, s16 6123; GFX9-NEXT: s_cmp_lt_u32 s16, 64 6124; GFX9-NEXT: s_cselect_b32 s28, 1, 0 6125; GFX9-NEXT: s_cmp_eq_u32 s16, 0 6126; GFX9-NEXT: s_cselect_b32 s29, 1, 0 6127; GFX9-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 6128; GFX9-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 6129; GFX9-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 6130; GFX9-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] 6131; GFX9-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 6132; GFX9-NEXT: s_cmp_lg_u32 s28, 0 6133; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 6134; GFX9-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] 6135; GFX9-NEXT: s_cmp_lg_u32 s29, 0 6136; GFX9-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] 6137; GFX9-NEXT: s_sub_i32 s26, s22, 64 6138; GFX9-NEXT: s_sub_i32 s24, 64, s22 6139; GFX9-NEXT: s_cmp_lt_u32 s22, 64 6140; GFX9-NEXT: s_cselect_b32 s27, 1, 0 6141; GFX9-NEXT: s_cmp_eq_u32 s22, 0 6142; GFX9-NEXT: s_cselect_b32 s28, 1, 0 6143; GFX9-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 6144; GFX9-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 6145; GFX9-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 6146; GFX9-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 6147; GFX9-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 6148; GFX9-NEXT: s_cmp_lg_u32 s27, 0 6149; GFX9-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] 6150; GFX9-NEXT: s_cmp_lg_u32 s28, 0 6151; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 6152; GFX9-NEXT: s_cmp_lg_u32 s27, 0 6153; GFX9-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 6154; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 6155; GFX9-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] 6156; GFX9-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 6157; GFX9-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] 6158; GFX9-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 6159; GFX9-NEXT: s_lshr_b32 s18, s5, 31 6160; GFX9-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 6161; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[18:19] 6162; GFX9-NEXT: s_sub_i32 s9, s10, 64 6163; GFX9-NEXT: s_sub_i32 s11, 64, s10 6164; GFX9-NEXT: s_cmp_lt_u32 s10, 64 6165; GFX9-NEXT: s_cselect_b32 s20, 1, 0 6166; GFX9-NEXT: s_cmp_eq_u32 s10, 0 6167; GFX9-NEXT: s_cselect_b32 s21, 1, 0 6168; GFX9-NEXT: s_lshl_b64 s[6:7], s[16:17], s10 6169; GFX9-NEXT: s_lshr_b64 s[18:19], s[16:17], s11 6170; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 6171; GFX9-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11] 6172; GFX9-NEXT: s_lshl_b64 s[16:17], s[16:17], s9 6173; GFX9-NEXT: s_cmp_lg_u32 s20, 0 6174; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 6175; GFX9-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17] 6176; GFX9-NEXT: s_cmp_lg_u32 s21, 0 6177; GFX9-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11] 6178; GFX9-NEXT: s_sub_i32 s18, s8, 64 6179; GFX9-NEXT: s_sub_i32 s16, 64, s8 6180; GFX9-NEXT: s_cmp_lt_u32 s8, 64 6181; GFX9-NEXT: s_cselect_b32 s19, 1, 0 6182; GFX9-NEXT: s_cmp_eq_u32 s8, 0 6183; GFX9-NEXT: s_cselect_b32 s20, 1, 0 6184; GFX9-NEXT: s_lshr_b64 s[4:5], s[14:15], s8 6185; GFX9-NEXT: s_lshr_b64 s[8:9], s[12:13], s8 6186; GFX9-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 6187; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17] 6188; GFX9-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 6189; GFX9-NEXT: s_cmp_lg_u32 s19, 0 6190; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15] 6191; GFX9-NEXT: s_cmp_lg_u32 s20, 0 6192; GFX9-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] 6193; GFX9-NEXT: s_cmp_lg_u32 s19, 0 6194; GFX9-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 6195; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9] 6196; GFX9-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13] 6197; GFX9-NEXT: ; return to shader part epilog 6198; 6199; GFX10-LABEL: s_fshr_v2i128: 6200; GFX10: ; %bb.0: 6201; GFX10-NEXT: s_movk_i32 s18, 0x7f 6202; GFX10-NEXT: s_mov_b32 s19, 0 6203; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6204; GFX10-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 6205; GFX10-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] 6206; GFX10-NEXT: s_lshr_b32 s24, s1, 31 6207; GFX10-NEXT: s_mov_b32 s25, s19 6208; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 6209; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[24:25] 6210; GFX10-NEXT: s_sub_i32 s23, s16, 64 6211; GFX10-NEXT: s_sub_i32 s17, 64, s16 6212; GFX10-NEXT: s_cmp_lt_u32 s16, 64 6213; GFX10-NEXT: s_cselect_b32 s28, 1, 0 6214; GFX10-NEXT: s_cmp_eq_u32 s16, 0 6215; GFX10-NEXT: s_cselect_b32 s29, 1, 0 6216; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s17 6217; GFX10-NEXT: s_lshl_b64 s[26:27], s[2:3], s16 6218; GFX10-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 6219; GFX10-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] 6220; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s23 6221; GFX10-NEXT: s_cmp_lg_u32 s28, 0 6222; GFX10-NEXT: s_cselect_b64 s[16:17], s[16:17], 0 6223; GFX10-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1] 6224; GFX10-NEXT: s_cmp_lg_u32 s29, 0 6225; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6226; GFX10-NEXT: s_sub_i32 s26, s22, 64 6227; GFX10-NEXT: s_sub_i32 s23, 64, s22 6228; GFX10-NEXT: s_cmp_lt_u32 s22, 64 6229; GFX10-NEXT: s_cselect_b32 s27, 1, 0 6230; GFX10-NEXT: s_cmp_eq_u32 s22, 0 6231; GFX10-NEXT: s_cselect_b32 s28, 1, 0 6232; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], s22 6233; GFX10-NEXT: s_lshl_b64 s[24:25], s[10:11], s23 6234; GFX10-NEXT: s_lshr_b64 s[22:23], s[10:11], s22 6235; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[24:25] 6236; GFX10-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 6237; GFX10-NEXT: s_cmp_lg_u32 s27, 0 6238; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11] 6239; GFX10-NEXT: s_cmp_lg_u32 s28, 0 6240; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] 6241; GFX10-NEXT: s_cmp_lg_u32 s27, 0 6242; GFX10-NEXT: s_cselect_b64 s[8:9], s[22:23], 0 6243; GFX10-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] 6244; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 6245; GFX10-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 6246; GFX10-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 6247; GFX10-NEXT: s_lshr_b32 s18, s5, 31 6248; GFX10-NEXT: s_or_b64 s[0:1], s[16:17], s[0:1] 6249; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], 1 6250; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[18:19] 6251; GFX10-NEXT: s_sub_i32 s9, s10, 64 6252; GFX10-NEXT: s_sub_i32 s11, 64, s10 6253; GFX10-NEXT: s_cmp_lt_u32 s10, 64 6254; GFX10-NEXT: s_cselect_b32 s20, 1, 0 6255; GFX10-NEXT: s_cmp_eq_u32 s10, 0 6256; GFX10-NEXT: s_cselect_b32 s21, 1, 0 6257; GFX10-NEXT: s_lshr_b64 s[16:17], s[4:5], s11 6258; GFX10-NEXT: s_lshl_b64 s[18:19], s[6:7], s10 6259; GFX10-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 6260; GFX10-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] 6261; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], s9 6262; GFX10-NEXT: s_cmp_lg_u32 s20, 0 6263; GFX10-NEXT: s_cselect_b64 s[10:11], s[10:11], 0 6264; GFX10-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5] 6265; GFX10-NEXT: s_cmp_lg_u32 s21, 0 6266; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] 6267; GFX10-NEXT: s_sub_i32 s18, s8, 64 6268; GFX10-NEXT: s_sub_i32 s9, 64, s8 6269; GFX10-NEXT: s_cmp_lt_u32 s8, 64 6270; GFX10-NEXT: s_cselect_b32 s19, 1, 0 6271; GFX10-NEXT: s_cmp_eq_u32 s8, 0 6272; GFX10-NEXT: s_cselect_b32 s20, 1, 0 6273; GFX10-NEXT: s_lshr_b64 s[4:5], s[12:13], s8 6274; GFX10-NEXT: s_lshl_b64 s[16:17], s[14:15], s9 6275; GFX10-NEXT: s_lshr_b64 s[8:9], s[14:15], s8 6276; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[16:17] 6277; GFX10-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 6278; GFX10-NEXT: s_cmp_lg_u32 s19, 0 6279; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[14:15] 6280; GFX10-NEXT: s_cmp_lg_u32 s20, 0 6281; GFX10-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5] 6282; GFX10-NEXT: s_cmp_lg_u32 s19, 0 6283; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 6284; GFX10-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] 6285; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6286; GFX10-NEXT: ; return to shader part epilog 6287 %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) 6288 ret <2 x i128> %result 6289} 6290 6291define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) { 6292; GFX6-LABEL: v_fshr_v2i128: 6293; GFX6: ; %bb.0: 6294; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6295; GFX6-NEXT: s_movk_i32 s6, 0x7f 6296; GFX6-NEXT: v_xor_b32_e32 v17, -1, v16 6297; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 6298; GFX6-NEXT: v_and_b32_e32 v23, s6, v17 6299; GFX6-NEXT: v_lshrrev_b32_e32 v17, 31, v1 6300; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 6301; GFX6-NEXT: v_or_b32_e32 v2, v2, v17 6302; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 64, v23 6303; GFX6-NEXT: v_lshr_b64 v[17:18], v[0:1], v17 6304; GFX6-NEXT: v_lshl_b64 v[21:22], v[2:3], v23 6305; GFX6-NEXT: v_and_b32_e32 v24, s6, v16 6306; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 64, v24 6307; GFX6-NEXT: v_or_b32_e32 v21, v17, v21 6308; GFX6-NEXT: v_or_b32_e32 v22, v18, v22 6309; GFX6-NEXT: v_lshl_b64 v[16:17], v[10:11], v16 6310; GFX6-NEXT: v_lshr_b64 v[18:19], v[8:9], v24 6311; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23 6312; GFX6-NEXT: v_or_b32_e32 v18, v18, v16 6313; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v23 6314; GFX6-NEXT: v_or_b32_e32 v19, v19, v17 6315; GFX6-NEXT: v_lshl_b64 v[16:17], v[0:1], v16 6316; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v23 6317; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 6318; GFX6-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc 6319; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc 6320; GFX6-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc 6321; GFX6-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5] 6322; GFX6-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5] 6323; GFX6-NEXT: v_subrev_i32_e64 v0, s[4:5], 64, v24 6324; GFX6-NEXT: v_lshr_b64 v[2:3], v[10:11], v0 6325; GFX6-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 6326; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5] 6327; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc 6328; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v24 6329; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 6330; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5] 6331; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 6332; GFX6-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5] 6333; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 6334; GFX6-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5] 6335; GFX6-NEXT: v_or_b32_e32 v0, v25, v2 6336; GFX6-NEXT: v_or_b32_e32 v2, v17, v8 6337; GFX6-NEXT: v_xor_b32_e32 v8, -1, v20 6338; GFX6-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 6339; GFX6-NEXT: v_or_b32_e32 v1, v18, v3 6340; GFX6-NEXT: v_or_b32_e32 v3, v16, v9 6341; GFX6-NEXT: v_and_b32_e32 v17, s6, v8 6342; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], 1 6343; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5 6344; GFX6-NEXT: v_or_b32_e32 v6, v6, v4 6345; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 64, v17 6346; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], v4 6347; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v17 6348; GFX6-NEXT: v_subrev_i32_e32 v18, vcc, 64, v17 6349; GFX6-NEXT: v_or_b32_e32 v10, v4, v10 6350; GFX6-NEXT: v_or_b32_e32 v11, v5, v11 6351; GFX6-NEXT: v_lshl_b64 v[4:5], v[8:9], v17 6352; GFX6-NEXT: v_lshl_b64 v[8:9], v[8:9], v18 6353; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 6354; GFX6-NEXT: v_and_b32_e32 v16, s6, v20 6355; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v4, vcc 6356; GFX6-NEXT: v_cndmask_b32_e32 v19, 0, v5, vcc 6357; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v10, vcc 6358; GFX6-NEXT: v_cndmask_b32_e32 v5, v9, v11, vcc 6359; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 6360; GFX6-NEXT: v_cndmask_b32_e32 v8, v4, v6, vcc 6361; GFX6-NEXT: v_cndmask_b32_e32 v9, v5, v7, vcc 6362; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v16 6363; GFX6-NEXT: v_lshr_b64 v[4:5], v[12:13], v16 6364; GFX6-NEXT: v_lshl_b64 v[6:7], v[14:15], v6 6365; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, 64, v16 6366; GFX6-NEXT: v_or_b32_e32 v11, v4, v6 6367; GFX6-NEXT: v_or_b32_e32 v17, v5, v7 6368; GFX6-NEXT: v_lshr_b64 v[6:7], v[14:15], v10 6369; GFX6-NEXT: v_lshr_b64 v[4:5], v[14:15], v16 6370; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 6371; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc 6372; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 6373; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 6374; GFX6-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] 6375; GFX6-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] 6376; GFX6-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc 6377; GFX6-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc 6378; GFX6-NEXT: v_or_b32_e32 v4, v18, v6 6379; GFX6-NEXT: v_or_b32_e32 v5, v19, v7 6380; GFX6-NEXT: v_or_b32_e32 v6, v8, v10 6381; GFX6-NEXT: v_or_b32_e32 v7, v9, v11 6382; GFX6-NEXT: s_setpc_b64 s[30:31] 6383; 6384; GFX8-LABEL: v_fshr_v2i128: 6385; GFX8: ; %bb.0: 6386; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6387; GFX8-NEXT: s_movk_i32 s6, 0x7f 6388; GFX8-NEXT: v_xor_b32_e32 v17, -1, v16 6389; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6390; GFX8-NEXT: v_and_b32_e32 v23, s6, v17 6391; GFX8-NEXT: v_lshrrev_b32_e32 v17, 31, v1 6392; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 6393; GFX8-NEXT: v_or_b32_e32 v2, v2, v17 6394; GFX8-NEXT: v_sub_u32_e32 v17, vcc, 64, v23 6395; GFX8-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1] 6396; GFX8-NEXT: v_lshlrev_b64 v[21:22], v23, v[2:3] 6397; GFX8-NEXT: v_and_b32_e32 v24, s6, v16 6398; GFX8-NEXT: v_sub_u32_e32 v16, vcc, 64, v24 6399; GFX8-NEXT: v_or_b32_e32 v21, v17, v21 6400; GFX8-NEXT: v_or_b32_e32 v22, v18, v22 6401; GFX8-NEXT: v_lshlrev_b64 v[16:17], v16, v[10:11] 6402; GFX8-NEXT: v_lshrrev_b64 v[18:19], v24, v[8:9] 6403; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23 6404; GFX8-NEXT: v_or_b32_e32 v18, v18, v16 6405; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v23 6406; GFX8-NEXT: v_or_b32_e32 v19, v19, v17 6407; GFX8-NEXT: v_lshlrev_b64 v[16:17], v16, v[0:1] 6408; GFX8-NEXT: v_lshlrev_b64 v[0:1], v23, v[0:1] 6409; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 6410; GFX8-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc 6411; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc 6412; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc 6413; GFX8-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5] 6414; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5] 6415; GFX8-NEXT: v_subrev_u32_e64 v0, s[4:5], 64, v24 6416; GFX8-NEXT: v_lshrrev_b64 v[2:3], v0, v[10:11] 6417; GFX8-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 6418; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5] 6419; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc 6420; GFX8-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11] 6421; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 6422; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5] 6423; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 6424; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5] 6425; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 6426; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5] 6427; GFX8-NEXT: v_or_b32_e32 v0, v25, v2 6428; GFX8-NEXT: v_or_b32_e32 v2, v17, v8 6429; GFX8-NEXT: v_xor_b32_e32 v8, -1, v20 6430; GFX8-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 6431; GFX8-NEXT: v_or_b32_e32 v1, v18, v3 6432; GFX8-NEXT: v_or_b32_e32 v3, v16, v9 6433; GFX8-NEXT: v_and_b32_e32 v17, s6, v8 6434; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5] 6435; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5 6436; GFX8-NEXT: v_or_b32_e32 v6, v6, v4 6437; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 64, v17 6438; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9] 6439; GFX8-NEXT: v_lshlrev_b64 v[10:11], v17, v[6:7] 6440; GFX8-NEXT: v_subrev_u32_e32 v18, vcc, 64, v17 6441; GFX8-NEXT: v_or_b32_e32 v10, v4, v10 6442; GFX8-NEXT: v_or_b32_e32 v11, v5, v11 6443; GFX8-NEXT: v_lshlrev_b64 v[4:5], v17, v[8:9] 6444; GFX8-NEXT: v_lshlrev_b64 v[8:9], v18, v[8:9] 6445; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 6446; GFX8-NEXT: v_and_b32_e32 v16, s6, v20 6447; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v4, vcc 6448; GFX8-NEXT: v_cndmask_b32_e32 v19, 0, v5, vcc 6449; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v10, vcc 6450; GFX8-NEXT: v_cndmask_b32_e32 v5, v9, v11, vcc 6451; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 6452; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v6, vcc 6453; GFX8-NEXT: v_cndmask_b32_e32 v9, v5, v7, vcc 6454; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v16 6455; GFX8-NEXT: v_lshrrev_b64 v[4:5], v16, v[12:13] 6456; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15] 6457; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, 64, v16 6458; GFX8-NEXT: v_or_b32_e32 v11, v4, v6 6459; GFX8-NEXT: v_or_b32_e32 v17, v5, v7 6460; GFX8-NEXT: v_lshrrev_b64 v[6:7], v10, v[14:15] 6461; GFX8-NEXT: v_lshrrev_b64 v[4:5], v16, v[14:15] 6462; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 6463; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc 6464; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 6465; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 6466; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] 6467; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] 6468; GFX8-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc 6469; GFX8-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc 6470; GFX8-NEXT: v_or_b32_e32 v4, v18, v6 6471; GFX8-NEXT: v_or_b32_e32 v5, v19, v7 6472; GFX8-NEXT: v_or_b32_e32 v6, v8, v10 6473; GFX8-NEXT: v_or_b32_e32 v7, v9, v11 6474; GFX8-NEXT: s_setpc_b64 s[30:31] 6475; 6476; GFX9-LABEL: v_fshr_v2i128: 6477; GFX9: ; %bb.0: 6478; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6479; GFX9-NEXT: s_movk_i32 s6, 0x7f 6480; GFX9-NEXT: v_xor_b32_e32 v17, -1, v16 6481; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6482; GFX9-NEXT: v_and_b32_e32 v23, s6, v17 6483; GFX9-NEXT: v_lshrrev_b32_e32 v17, 31, v1 6484; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 6485; GFX9-NEXT: v_or_b32_e32 v2, v2, v17 6486; GFX9-NEXT: v_sub_u32_e32 v17, 64, v23 6487; GFX9-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1] 6488; GFX9-NEXT: v_lshlrev_b64 v[21:22], v23, v[2:3] 6489; GFX9-NEXT: v_and_b32_e32 v24, s6, v16 6490; GFX9-NEXT: v_sub_u32_e32 v16, 64, v24 6491; GFX9-NEXT: v_or_b32_e32 v21, v17, v21 6492; GFX9-NEXT: v_or_b32_e32 v22, v18, v22 6493; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[10:11] 6494; GFX9-NEXT: v_lshrrev_b64 v[18:19], v24, v[8:9] 6495; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 6496; GFX9-NEXT: v_or_b32_e32 v18, v18, v16 6497; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v23 6498; GFX9-NEXT: v_or_b32_e32 v19, v19, v17 6499; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[0:1] 6500; GFX9-NEXT: v_lshlrev_b64 v[0:1], v23, v[0:1] 6501; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23 6502; GFX9-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc 6503; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc 6504; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc 6505; GFX9-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5] 6506; GFX9-NEXT: v_subrev_u32_e32 v0, 64, v24 6507; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5] 6508; GFX9-NEXT: v_lshrrev_b64 v[2:3], v0, v[10:11] 6509; GFX9-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 6510; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5] 6511; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc 6512; GFX9-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11] 6513; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 6514; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5] 6515; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 6516; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5] 6517; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 6518; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5] 6519; GFX9-NEXT: v_or_b32_e32 v0, v25, v2 6520; GFX9-NEXT: v_or_b32_e32 v2, v17, v8 6521; GFX9-NEXT: v_xor_b32_e32 v8, -1, v20 6522; GFX9-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 6523; GFX9-NEXT: v_or_b32_e32 v1, v18, v3 6524; GFX9-NEXT: v_or_b32_e32 v3, v16, v9 6525; GFX9-NEXT: v_and_b32_e32 v17, s6, v8 6526; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5] 6527; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5 6528; GFX9-NEXT: v_or_b32_e32 v6, v6, v4 6529; GFX9-NEXT: v_sub_u32_e32 v4, 64, v17 6530; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9] 6531; GFX9-NEXT: v_lshlrev_b64 v[10:11], v17, v[6:7] 6532; GFX9-NEXT: v_subrev_u32_e32 v18, 64, v17 6533; GFX9-NEXT: v_or_b32_e32 v10, v4, v10 6534; GFX9-NEXT: v_or_b32_e32 v11, v5, v11 6535; GFX9-NEXT: v_lshlrev_b64 v[4:5], v17, v[8:9] 6536; GFX9-NEXT: v_lshlrev_b64 v[8:9], v18, v[8:9] 6537; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 6538; GFX9-NEXT: v_and_b32_e32 v16, s6, v20 6539; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v4, vcc 6540; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v5, vcc 6541; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v10, vcc 6542; GFX9-NEXT: v_cndmask_b32_e32 v5, v9, v11, vcc 6543; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 6544; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v6, vcc 6545; GFX9-NEXT: v_sub_u32_e32 v6, 64, v16 6546; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v7, vcc 6547; GFX9-NEXT: v_lshrrev_b64 v[4:5], v16, v[12:13] 6548; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15] 6549; GFX9-NEXT: v_subrev_u32_e32 v10, 64, v16 6550; GFX9-NEXT: v_or_b32_e32 v11, v4, v6 6551; GFX9-NEXT: v_or_b32_e32 v17, v5, v7 6552; GFX9-NEXT: v_lshrrev_b64 v[6:7], v10, v[14:15] 6553; GFX9-NEXT: v_lshrrev_b64 v[4:5], v16, v[14:15] 6554; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 6555; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc 6556; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 6557; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 6558; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] 6559; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] 6560; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc 6561; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc 6562; GFX9-NEXT: v_or_b32_e32 v4, v18, v6 6563; GFX9-NEXT: v_or_b32_e32 v5, v19, v7 6564; GFX9-NEXT: v_or_b32_e32 v6, v8, v10 6565; GFX9-NEXT: v_or_b32_e32 v7, v9, v11 6566; GFX9-NEXT: s_setpc_b64 s[30:31] 6567; 6568; GFX10-LABEL: v_fshr_v2i128: 6569; GFX10: ; %bb.0: 6570; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6571; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6572; GFX10-NEXT: v_xor_b32_e32 v17, -1, v16 6573; GFX10-NEXT: s_movk_i32 s5, 0x7f 6574; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6575; GFX10-NEXT: v_and_b32_e32 v26, s5, v16 6576; GFX10-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 6577; GFX10-NEXT: v_and_b32_e32 v25, s5, v17 6578; GFX10-NEXT: v_lshrrev_b32_e32 v17, 31, v1 6579; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 6580; GFX10-NEXT: v_subrev_nc_u32_e32 v27, 64, v26 6581; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v26 6582; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v25 6583; GFX10-NEXT: v_or_b32_e32 v2, v2, v17 6584; GFX10-NEXT: v_subrev_nc_u32_e32 v19, 64, v25 6585; GFX10-NEXT: v_lshlrev_b64 v[23:24], v25, v[0:1] 6586; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25 6587; GFX10-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1] 6588; GFX10-NEXT: v_lshlrev_b64 v[21:22], v25, v[2:3] 6589; GFX10-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1] 6590; GFX10-NEXT: v_cndmask_b32_e32 v23, 0, v23, vcc_lo 6591; GFX10-NEXT: v_cndmask_b32_e32 v24, 0, v24, vcc_lo 6592; GFX10-NEXT: v_or_b32_e32 v22, v18, v22 6593; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v26 6594; GFX10-NEXT: v_or_b32_e32 v21, v17, v21 6595; GFX10-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9] 6596; GFX10-NEXT: v_cndmask_b32_e32 v22, v1, v22, vcc_lo 6597; GFX10-NEXT: v_lshlrev_b64 v[18:19], v18, v[10:11] 6598; GFX10-NEXT: v_cndmask_b32_e32 v21, v0, v21, vcc_lo 6599; GFX10-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11] 6600; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v25 6601; GFX10-NEXT: v_or_b32_e32 v16, v16, v18 6602; GFX10-NEXT: v_or_b32_e32 v17, v17, v19 6603; GFX10-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo 6604; GFX10-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo 6605; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26 6606; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v16, s4 6607; GFX10-NEXT: v_xor_b32_e32 v16, -1, v20 6608; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v17, s4 6609; GFX10-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11] 6610; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 6611; GFX10-NEXT: v_and_b32_e32 v25, s5, v16 6612; GFX10-NEXT: v_lshrrev_b32_e32 v8, 31, v5 6613; GFX10-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5] 6614; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 6615; GFX10-NEXT: v_or_b32_e32 v0, v23, v0 6616; GFX10-NEXT: v_sub_nc_u32_e32 v9, 64, v25 6617; GFX10-NEXT: v_or_b32_e32 v6, v6, v8 6618; GFX10-NEXT: v_and_b32_e32 v23, s5, v20 6619; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s4 6620; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, v3, s4 6621; GFX10-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5] 6622; GFX10-NEXT: v_lshlrev_b64 v[10:11], v25, v[6:7] 6623; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v23 6624; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v25 6625; GFX10-NEXT: v_or_b32_e32 v2, v18, v2 6626; GFX10-NEXT: v_lshlrev_b64 v[16:17], v25, v[4:5] 6627; GFX10-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13] 6628; GFX10-NEXT: v_or_b32_e32 v10, v8, v10 6629; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v23 6630; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] 6631; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25 6632; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] 6633; GFX10-NEXT: v_or_b32_e32 v5, v9, v11 6634; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[14:15] 6635; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v23 6636; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v16, vcc_lo 6637; GFX10-NEXT: v_or_b32_e32 v16, v18, v20 6638; GFX10-NEXT: v_or_b32_e32 v18, v19, v21 6639; GFX10-NEXT: v_cndmask_b32_e32 v10, v3, v10, vcc_lo 6640; GFX10-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo 6641; GFX10-NEXT: v_lshrrev_b64 v[3:4], v23, v[14:15] 6642; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v16, s4 6643; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v23 6644; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v25 6645; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v18, s4 6646; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo 6647; GFX10-NEXT: v_or_b32_e32 v1, v24, v1 6648; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v6, s6 6649; GFX10-NEXT: v_cndmask_b32_e64 v7, v5, v7, s6 6650; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, v12, s5 6651; GFX10-NEXT: v_cndmask_b32_e64 v8, v9, v13, s5 6652; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v3, s4 6653; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v4, s4 6654; GFX10-NEXT: v_or_b32_e32 v3, v22, v26 6655; GFX10-NEXT: v_or_b32_e32 v4, v11, v5 6656; GFX10-NEXT: v_or_b32_e32 v5, v14, v8 6657; GFX10-NEXT: v_or_b32_e32 v6, v6, v9 6658; GFX10-NEXT: v_or_b32_e32 v7, v7, v10 6659; GFX10-NEXT: s_setpc_b64 s[30:31] 6660 %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) 6661 ret <2 x i128> %result 6662} 6663 6664declare i7 @llvm.fshr.i7(i7, i7, i7) #0 6665declare i8 @llvm.fshr.i8(i8, i8, i8) #0 6666declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0 6667declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0 6668 6669declare i16 @llvm.fshr.i16(i16, i16, i16) #0 6670declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0 6671declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0 6672declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0 6673declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0 6674declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0 6675declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0 6676 6677declare i24 @llvm.fshr.i24(i24, i24, i24) #0 6678declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0 6679 6680declare i32 @llvm.fshr.i32(i32, i32, i32) #0 6681declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0 6682declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0 6683declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0 6684declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0 6685declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0 6686 6687declare i48 @llvm.fshr.i48(i48, i48, i48) #0 6688 6689declare i64 @llvm.fshr.i64(i64, i64, i64) #0 6690declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0 6691 6692declare i128 @llvm.fshr.i128(i128, i128, i128) #0 6693declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0 6694 6695attributes #0 = { nounwind readnone speculatable willreturn } 6696