1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s 6 7define i7 @v_ssubsat_i7(i7 %lhs, i7 %rhs) { 8; GFX6-LABEL: v_ssubsat_i7: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX6-NEXT: v_lshlrev_b32_e32 v0, 25, v0 12; GFX6-NEXT: v_max_i32_e32 v2, -1, v0 13; GFX6-NEXT: v_lshlrev_b32_e32 v1, 25, v1 14; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2 15; GFX6-NEXT: v_min_i32_e32 v3, -1, v0 16; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3 17; GFX6-NEXT: v_max_i32_e32 v1, v2, v1 18; GFX6-NEXT: v_min_i32_e32 v1, v1, v3 19; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 20; GFX6-NEXT: v_ashrrev_i32_e32 v0, 25, v0 21; GFX6-NEXT: s_setpc_b64 s[30:31] 22; 23; GFX8-LABEL: v_ssubsat_i7: 24; GFX8: ; %bb.0: 25; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GFX8-NEXT: v_lshlrev_b16_e32 v0, 9, v0 27; GFX8-NEXT: v_max_i16_e32 v2, -1, v0 28; GFX8-NEXT: v_lshlrev_b16_e32 v1, 9, v1 29; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2 30; GFX8-NEXT: v_min_i16_e32 v3, -1, v0 31; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3 32; GFX8-NEXT: v_max_i16_e32 v1, v2, v1 33; GFX8-NEXT: v_min_i16_e32 v1, v1, v3 34; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 35; GFX8-NEXT: v_ashrrev_i16_e32 v0, 9, v0 36; GFX8-NEXT: s_setpc_b64 s[30:31] 37; 38; GFX9-LABEL: v_ssubsat_i7: 39; GFX9: ; %bb.0: 40; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; GFX9-NEXT: v_lshlrev_b16_e32 v0, 9, v0 42; GFX9-NEXT: v_lshlrev_b16_e32 v1, 9, v1 43; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp 44; GFX9-NEXT: v_ashrrev_i16_e32 v0, 9, v0 45; GFX9-NEXT: s_setpc_b64 s[30:31] 46; 47; GFX10-LABEL: v_ssubsat_i7: 48; GFX10: ; %bb.0: 49; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 50; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 51; GFX10-NEXT: v_lshlrev_b16 v0, 9, v0 52; GFX10-NEXT: v_lshlrev_b16 v1, 9, v1 53; GFX10-NEXT: v_sub_nc_i16 v0, v0, v1 clamp 54; GFX10-NEXT: v_ashrrev_i16 v0, 9, v0 55; GFX10-NEXT: s_setpc_b64 s[30:31] 56 %result = call i7 @llvm.ssub.sat.i7(i7 %lhs, i7 %rhs) 57 ret i7 %result 58} 59 60define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) { 61; GFX6-LABEL: s_ssubsat_i7: 62; GFX6: ; %bb.0: 63; GFX6-NEXT: s_lshl_b32 s0, s0, 25 64; GFX6-NEXT: s_max_i32 s2, s0, -1 65; GFX6-NEXT: s_lshl_b32 s1, s1, 25 66; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff 67; GFX6-NEXT: s_min_i32 s3, s0, -1 68; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000 69; GFX6-NEXT: s_max_i32 s1, s2, s1 70; GFX6-NEXT: s_min_i32 s1, s1, s3 71; GFX6-NEXT: s_sub_i32 s0, s0, s1 72; GFX6-NEXT: s_ashr_i32 s0, s0, 25 73; GFX6-NEXT: ; return to shader part epilog 74; 75; GFX8-LABEL: s_ssubsat_i7: 76; GFX8: ; %bb.0: 77; GFX8-NEXT: s_bfe_u32 s2, 9, 0x100000 78; GFX8-NEXT: s_lshl_b32 s0, s0, s2 79; GFX8-NEXT: s_sext_i32_i16 s3, s0 80; GFX8-NEXT: s_sext_i32_i16 s4, -1 81; GFX8-NEXT: s_max_i32 s5, s3, s4 82; GFX8-NEXT: s_lshl_b32 s1, s1, s2 83; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fff 84; GFX8-NEXT: s_min_i32 s3, s3, s4 85; GFX8-NEXT: s_sext_i32_i16 s4, s5 86; GFX8-NEXT: s_sext_i32_i16 s1, s1 87; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000 88; GFX8-NEXT: s_max_i32 s1, s4, s1 89; GFX8-NEXT: s_sext_i32_i16 s1, s1 90; GFX8-NEXT: s_sext_i32_i16 s3, s3 91; GFX8-NEXT: s_min_i32 s1, s1, s3 92; GFX8-NEXT: s_sub_i32 s0, s0, s1 93; GFX8-NEXT: s_sext_i32_i16 s0, s0 94; GFX8-NEXT: s_ashr_i32 s0, s0, s2 95; GFX8-NEXT: ; return to shader part epilog 96; 97; GFX9-LABEL: s_ssubsat_i7: 98; GFX9: ; %bb.0: 99; GFX9-NEXT: s_bfe_u32 s2, 9, 0x100000 100; GFX9-NEXT: s_lshl_b32 s1, s1, s2 101; GFX9-NEXT: s_lshl_b32 s0, s0, s2 102; GFX9-NEXT: v_mov_b32_e32 v0, s1 103; GFX9-NEXT: v_sub_i16 v0, s0, v0 clamp 104; GFX9-NEXT: v_ashrrev_i16_e32 v0, 9, v0 105; GFX9-NEXT: v_readfirstlane_b32 s0, v0 106; GFX9-NEXT: ; return to shader part epilog 107; 108; GFX10-LABEL: s_ssubsat_i7: 109; GFX10: ; %bb.0: 110; GFX10-NEXT: s_bfe_u32 s2, 9, 0x100000 111; GFX10-NEXT: s_lshl_b32 s0, s0, s2 112; GFX10-NEXT: s_lshl_b32 s1, s1, s2 113; GFX10-NEXT: v_sub_nc_i16 v0, s0, s1 clamp 114; GFX10-NEXT: v_ashrrev_i16 v0, 9, v0 115; GFX10-NEXT: v_readfirstlane_b32 s0, v0 116; GFX10-NEXT: ; return to shader part epilog 117 %result = call i7 @llvm.ssub.sat.i7(i7 %lhs, i7 %rhs) 118 ret i7 %result 119} 120 121define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) { 122; GFX6-LABEL: v_ssubsat_i8: 123; GFX6: ; %bb.0: 124; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 126; GFX6-NEXT: v_max_i32_e32 v2, -1, v0 127; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 128; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2 129; GFX6-NEXT: v_min_i32_e32 v3, -1, v0 130; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3 131; GFX6-NEXT: v_max_i32_e32 v1, v2, v1 132; GFX6-NEXT: v_min_i32_e32 v1, v1, v3 133; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 134; GFX6-NEXT: v_ashrrev_i32_e32 v0, 24, v0 135; GFX6-NEXT: s_setpc_b64 s[30:31] 136; 137; GFX8-LABEL: v_ssubsat_i8: 138; GFX8: ; %bb.0: 139; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 141; GFX8-NEXT: v_max_i16_e32 v2, -1, v0 142; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 143; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2 144; GFX8-NEXT: v_min_i16_e32 v3, -1, v0 145; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3 146; GFX8-NEXT: v_max_i16_e32 v1, v2, v1 147; GFX8-NEXT: v_min_i16_e32 v1, v1, v3 148; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 149; GFX8-NEXT: v_ashrrev_i16_e32 v0, 8, v0 150; GFX8-NEXT: s_setpc_b64 s[30:31] 151; 152; GFX9-LABEL: v_ssubsat_i8: 153; GFX9: ; %bb.0: 154; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0 156; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 157; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp 158; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v0 159; GFX9-NEXT: s_setpc_b64 s[30:31] 160; 161; GFX10-LABEL: v_ssubsat_i8: 162; GFX10: ; %bb.0: 163; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 165; GFX10-NEXT: v_lshlrev_b16 v0, 8, v0 166; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 167; GFX10-NEXT: v_sub_nc_i16 v0, v0, v1 clamp 168; GFX10-NEXT: v_ashrrev_i16 v0, 8, v0 169; GFX10-NEXT: s_setpc_b64 s[30:31] 170 %result = call i8 @llvm.ssub.sat.i8(i8 %lhs, i8 %rhs) 171 ret i8 %result 172} 173 174define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) { 175; GFX6-LABEL: s_ssubsat_i8: 176; GFX6: ; %bb.0: 177; GFX6-NEXT: s_lshl_b32 s0, s0, 24 178; GFX6-NEXT: s_max_i32 s2, s0, -1 179; GFX6-NEXT: s_lshl_b32 s1, s1, 24 180; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff 181; GFX6-NEXT: s_min_i32 s3, s0, -1 182; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000 183; GFX6-NEXT: s_max_i32 s1, s2, s1 184; GFX6-NEXT: s_min_i32 s1, s1, s3 185; GFX6-NEXT: s_sub_i32 s0, s0, s1 186; GFX6-NEXT: s_ashr_i32 s0, s0, 24 187; GFX6-NEXT: ; return to shader part epilog 188; 189; GFX8-LABEL: s_ssubsat_i8: 190; GFX8: ; %bb.0: 191; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000 192; GFX8-NEXT: s_lshl_b32 s0, s0, s2 193; GFX8-NEXT: s_sext_i32_i16 s3, s0 194; GFX8-NEXT: s_sext_i32_i16 s4, -1 195; GFX8-NEXT: s_max_i32 s5, s3, s4 196; GFX8-NEXT: s_lshl_b32 s1, s1, s2 197; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fff 198; GFX8-NEXT: s_min_i32 s3, s3, s4 199; GFX8-NEXT: s_sext_i32_i16 s4, s5 200; GFX8-NEXT: s_sext_i32_i16 s1, s1 201; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000 202; GFX8-NEXT: s_max_i32 s1, s4, s1 203; GFX8-NEXT: s_sext_i32_i16 s1, s1 204; GFX8-NEXT: s_sext_i32_i16 s3, s3 205; GFX8-NEXT: s_min_i32 s1, s1, s3 206; GFX8-NEXT: s_sub_i32 s0, s0, s1 207; GFX8-NEXT: s_sext_i32_i16 s0, s0 208; GFX8-NEXT: s_ashr_i32 s0, s0, s2 209; GFX8-NEXT: ; return to shader part epilog 210; 211; GFX9-LABEL: s_ssubsat_i8: 212; GFX9: ; %bb.0: 213; GFX9-NEXT: s_bfe_u32 s2, 8, 0x100000 214; GFX9-NEXT: s_lshl_b32 s1, s1, s2 215; GFX9-NEXT: s_lshl_b32 s0, s0, s2 216; GFX9-NEXT: v_mov_b32_e32 v0, s1 217; GFX9-NEXT: v_sub_i16 v0, s0, v0 clamp 218; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v0 219; GFX9-NEXT: v_readfirstlane_b32 s0, v0 220; GFX9-NEXT: ; return to shader part epilog 221; 222; GFX10-LABEL: s_ssubsat_i8: 223; GFX10: ; %bb.0: 224; GFX10-NEXT: s_bfe_u32 s2, 8, 0x100000 225; GFX10-NEXT: s_lshl_b32 s0, s0, s2 226; GFX10-NEXT: s_lshl_b32 s1, s1, s2 227; GFX10-NEXT: v_sub_nc_i16 v0, s0, s1 clamp 228; GFX10-NEXT: v_ashrrev_i16 v0, 8, v0 229; GFX10-NEXT: v_readfirstlane_b32 s0, v0 230; GFX10-NEXT: ; return to shader part epilog 231 %result = call i8 @llvm.ssub.sat.i8(i8 %lhs, i8 %rhs) 232 ret i8 %result 233} 234 235define i16 @v_ssubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) { 236; GFX6-LABEL: v_ssubsat_v2i8: 237; GFX6: ; %bb.0: 238; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 239; GFX6-NEXT: v_lshrrev_b32_e32 v2, 8, v0 240; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 241; GFX6-NEXT: s_brev_b32 s4, -2 242; GFX6-NEXT: v_max_i32_e32 v4, -1, v0 243; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v1 244; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 245; GFX6-NEXT: s_brev_b32 s5, 1 246; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 247; GFX6-NEXT: v_min_i32_e32 v5, -1, v0 248; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 249; GFX6-NEXT: v_max_i32_e32 v1, v4, v1 250; GFX6-NEXT: v_min_i32_e32 v1, v1, v5 251; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 252; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2 253; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3 254; GFX6-NEXT: v_max_i32_e32 v3, -1, v1 255; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 256; GFX6-NEXT: v_min_i32_e32 v4, -1, v1 257; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 258; GFX6-NEXT: v_max_i32_e32 v2, v3, v2 259; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 260; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 261; GFX6-NEXT: v_ashrrev_i32_e32 v1, 24, v1 262; GFX6-NEXT: v_mov_b32_e32 v2, 0xff 263; GFX6-NEXT: v_ashrrev_i32_e32 v0, 24, v0 264; GFX6-NEXT: v_and_b32_e32 v1, v1, v2 265; GFX6-NEXT: v_and_b32_e32 v0, v0, v2 266; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 267; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 268; GFX6-NEXT: s_setpc_b64 s[30:31] 269; 270; GFX8-LABEL: v_ssubsat_v2i8: 271; GFX8: ; %bb.0: 272; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; GFX8-NEXT: v_mov_b32_e32 v2, 8 274; GFX8-NEXT: v_lshrrev_b32_sdwa v3, v2, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 275; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 276; GFX8-NEXT: s_movk_i32 s4, 0x7fff 277; GFX8-NEXT: v_max_i16_e32 v4, -1, v0 278; GFX8-NEXT: v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 279; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 280; GFX8-NEXT: s_movk_i32 s5, 0x8000 281; GFX8-NEXT: v_subrev_u16_e32 v4, s4, v4 282; GFX8-NEXT: v_min_i16_e32 v5, -1, v0 283; GFX8-NEXT: v_subrev_u16_e32 v5, s5, v5 284; GFX8-NEXT: v_max_i16_e32 v1, v4, v1 285; GFX8-NEXT: v_min_i16_e32 v1, v1, v5 286; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 287; GFX8-NEXT: v_max_i16_e32 v1, -1, v3 288; GFX8-NEXT: v_subrev_u16_e32 v1, s4, v1 289; GFX8-NEXT: v_min_i16_e32 v4, -1, v3 290; GFX8-NEXT: v_subrev_u16_e32 v4, s5, v4 291; GFX8-NEXT: v_max_i16_e32 v1, v1, v2 292; GFX8-NEXT: v_min_i16_e32 v1, v1, v4 293; GFX8-NEXT: v_sub_u16_e32 v1, v3, v1 294; GFX8-NEXT: v_mov_b32_e32 v2, 0xff 295; GFX8-NEXT: v_and_b32_sdwa v0, sext(v0), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD 296; GFX8-NEXT: v_and_b32_sdwa v1, sext(v1), v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD 297; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 298; GFX8-NEXT: s_setpc_b64 s[30:31] 299; 300; GFX9-LABEL: v_ssubsat_v2i8: 301; GFX9: ; %bb.0: 302; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 303; GFX9-NEXT: s_mov_b32 s4, 8 304; GFX9-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 305; GFX9-NEXT: v_lshrrev_b32_sdwa v3, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 306; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 307; GFX9-NEXT: v_and_or_b32 v0, v0, v4, v2 308; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v3 309; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] 310; GFX9-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] 311; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 clamp 312; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] 313; GFX9-NEXT: s_movk_i32 s4, 0xff 314; GFX9-NEXT: v_and_b32_sdwa v1, v0, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 315; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 316; GFX9-NEXT: s_setpc_b64 s[30:31] 317; 318; GFX10-LABEL: v_ssubsat_v2i8: 319; GFX10: ; %bb.0: 320; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 321; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 322; GFX10-NEXT: s_mov_b32 s4, 8 323; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff 324; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 325; GFX10-NEXT: v_lshrrev_b32_sdwa v4, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 326; GFX10-NEXT: s_movk_i32 s4, 0xff 327; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 328; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v4 329; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] 330; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] 331; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 clamp 332; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] 333; GFX10-NEXT: v_and_b32_sdwa v1, v0, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 334; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 335; GFX10-NEXT: s_setpc_b64 s[30:31] 336 %lhs = bitcast i16 %lhs.arg to <2 x i8> 337 %rhs = bitcast i16 %rhs.arg to <2 x i8> 338 %result = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %lhs, <2 x i8> %rhs) 339 %cast.result = bitcast <2 x i8> %result to i16 340 ret i16 %cast.result 341} 342 343define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) { 344; GFX6-LABEL: s_ssubsat_v2i8: 345; GFX6: ; %bb.0: 346; GFX6-NEXT: s_lshr_b32 s2, s0, 8 347; GFX6-NEXT: s_lshl_b32 s0, s0, 24 348; GFX6-NEXT: s_brev_b32 s4, -2 349; GFX6-NEXT: s_max_i32 s6, s0, -1 350; GFX6-NEXT: s_lshr_b32 s3, s1, 8 351; GFX6-NEXT: s_lshl_b32 s1, s1, 24 352; GFX6-NEXT: s_brev_b32 s5, 1 353; GFX6-NEXT: s_sub_i32 s6, s6, s4 354; GFX6-NEXT: s_min_i32 s7, s0, -1 355; GFX6-NEXT: s_sub_i32 s7, s7, s5 356; GFX6-NEXT: s_max_i32 s1, s6, s1 357; GFX6-NEXT: s_min_i32 s1, s1, s7 358; GFX6-NEXT: s_sub_i32 s0, s0, s1 359; GFX6-NEXT: s_lshl_b32 s1, s2, 24 360; GFX6-NEXT: s_lshl_b32 s2, s3, 24 361; GFX6-NEXT: s_max_i32 s3, s1, -1 362; GFX6-NEXT: s_sub_i32 s3, s3, s4 363; GFX6-NEXT: s_min_i32 s4, s1, -1 364; GFX6-NEXT: s_sub_i32 s4, s4, s5 365; GFX6-NEXT: s_max_i32 s2, s3, s2 366; GFX6-NEXT: s_min_i32 s2, s2, s4 367; GFX6-NEXT: s_sub_i32 s1, s1, s2 368; GFX6-NEXT: s_ashr_i32 s1, s1, 24 369; GFX6-NEXT: s_movk_i32 s2, 0xff 370; GFX6-NEXT: s_ashr_i32 s0, s0, 24 371; GFX6-NEXT: s_and_b32 s1, s1, s2 372; GFX6-NEXT: s_and_b32 s0, s0, s2 373; GFX6-NEXT: s_lshl_b32 s1, s1, 8 374; GFX6-NEXT: s_or_b32 s0, s0, s1 375; GFX6-NEXT: ; return to shader part epilog 376; 377; GFX8-LABEL: s_ssubsat_v2i8: 378; GFX8: ; %bb.0: 379; GFX8-NEXT: s_bfe_u32 s4, 8, 0x100000 380; GFX8-NEXT: s_lshr_b32 s2, s0, 8 381; GFX8-NEXT: s_lshl_b32 s0, s0, s4 382; GFX8-NEXT: s_sext_i32_i16 s7, s0 383; GFX8-NEXT: s_sext_i32_i16 s8, -1 384; GFX8-NEXT: s_movk_i32 s5, 0x7fff 385; GFX8-NEXT: s_max_i32 s9, s7, s8 386; GFX8-NEXT: s_lshr_b32 s3, s1, 8 387; GFX8-NEXT: s_lshl_b32 s1, s1, s4 388; GFX8-NEXT: s_sub_i32 s9, s9, s5 389; GFX8-NEXT: s_movk_i32 s6, 0x8000 390; GFX8-NEXT: s_min_i32 s7, s7, s8 391; GFX8-NEXT: s_sext_i32_i16 s9, s9 392; GFX8-NEXT: s_sext_i32_i16 s1, s1 393; GFX8-NEXT: s_sub_i32 s7, s7, s6 394; GFX8-NEXT: s_max_i32 s1, s9, s1 395; GFX8-NEXT: s_sext_i32_i16 s1, s1 396; GFX8-NEXT: s_sext_i32_i16 s7, s7 397; GFX8-NEXT: s_min_i32 s1, s1, s7 398; GFX8-NEXT: s_sub_i32 s0, s0, s1 399; GFX8-NEXT: s_lshl_b32 s1, s2, s4 400; GFX8-NEXT: s_lshl_b32 s2, s3, s4 401; GFX8-NEXT: s_sext_i32_i16 s3, s1 402; GFX8-NEXT: s_max_i32 s7, s3, s8 403; GFX8-NEXT: s_sub_i32 s5, s7, s5 404; GFX8-NEXT: s_min_i32 s3, s3, s8 405; GFX8-NEXT: s_sext_i32_i16 s5, s5 406; GFX8-NEXT: s_sext_i32_i16 s2, s2 407; GFX8-NEXT: s_sub_i32 s3, s3, s6 408; GFX8-NEXT: s_max_i32 s2, s5, s2 409; GFX8-NEXT: s_sext_i32_i16 s2, s2 410; GFX8-NEXT: s_sext_i32_i16 s3, s3 411; GFX8-NEXT: s_min_i32 s2, s2, s3 412; GFX8-NEXT: s_sub_i32 s1, s1, s2 413; GFX8-NEXT: s_sext_i32_i16 s1, s1 414; GFX8-NEXT: s_sext_i32_i16 s0, s0 415; GFX8-NEXT: s_ashr_i32 s1, s1, s4 416; GFX8-NEXT: s_movk_i32 s2, 0xff 417; GFX8-NEXT: s_ashr_i32 s0, s0, s4 418; GFX8-NEXT: s_and_b32 s1, s1, s2 419; GFX8-NEXT: s_and_b32 s0, s0, s2 420; GFX8-NEXT: s_lshl_b32 s1, s1, s4 421; GFX8-NEXT: s_or_b32 s0, s0, s1 422; GFX8-NEXT: ; return to shader part epilog 423; 424; GFX9-LABEL: s_ssubsat_v2i8: 425; GFX9: ; %bb.0: 426; GFX9-NEXT: s_lshr_b32 s2, s0, 8 427; GFX9-NEXT: s_lshr_b32 s3, s1, 8 428; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 429; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s3 430; GFX9-NEXT: s_mov_b32 s2, 0x80008 431; GFX9-NEXT: s_lshr_b32 s3, s0, 16 432; GFX9-NEXT: s_lshl_b32 s0, s0, s2 433; GFX9-NEXT: s_lshl_b32 s3, s3, 8 434; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3 435; GFX9-NEXT: s_lshr_b32 s3, s1, 16 436; GFX9-NEXT: s_lshl_b32 s1, s1, s2 437; GFX9-NEXT: s_lshl_b32 s2, s3, 8 438; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 439; GFX9-NEXT: v_mov_b32_e32 v0, s1 440; GFX9-NEXT: v_pk_sub_i16 v0, s0, v0 clamp 441; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] 442; GFX9-NEXT: s_movk_i32 s0, 0xff 443; GFX9-NEXT: v_and_b32_sdwa v1, v0, s0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 444; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 445; GFX9-NEXT: v_readfirstlane_b32 s0, v0 446; GFX9-NEXT: ; return to shader part epilog 447; 448; GFX10-LABEL: s_ssubsat_v2i8: 449; GFX10: ; %bb.0: 450; GFX10-NEXT: s_lshr_b32 s2, s0, 8 451; GFX10-NEXT: s_lshr_b32 s3, s1, 8 452; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 453; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3 454; GFX10-NEXT: s_mov_b32 s2, 0x80008 455; GFX10-NEXT: s_lshr_b32 s3, s0, 16 456; GFX10-NEXT: s_lshr_b32 s4, s1, 16 457; GFX10-NEXT: s_lshl_b32 s0, s0, s2 458; GFX10-NEXT: s_lshl_b32 s3, s3, 8 459; GFX10-NEXT: s_lshl_b32 s1, s1, s2 460; GFX10-NEXT: s_lshl_b32 s2, s4, 8 461; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s3 462; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2 463; GFX10-NEXT: v_pk_sub_i16 v0, s0, s1 clamp 464; GFX10-NEXT: s_movk_i32 s0, 0xff 465; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] 466; GFX10-NEXT: v_and_b32_sdwa v1, v0, s0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 467; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 468; GFX10-NEXT: v_readfirstlane_b32 s0, v0 469; GFX10-NEXT: ; return to shader part epilog 470 %lhs = bitcast i16 %lhs.arg to <2 x i8> 471 %rhs = bitcast i16 %rhs.arg to <2 x i8> 472 %result = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %lhs, <2 x i8> %rhs) 473 %cast.result = bitcast <2 x i8> %result to i16 474 ret i16 %cast.result 475} 476 477define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { 478; GFX6-LABEL: v_ssubsat_v4i8: 479; GFX6: ; %bb.0: 480; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 481; GFX6-NEXT: v_lshrrev_b32_e32 v2, 8, v0 482; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 483; GFX6-NEXT: v_lshrrev_b32_e32 v4, 24, v0 484; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 485; GFX6-NEXT: s_brev_b32 s4, -2 486; GFX6-NEXT: v_max_i32_e32 v8, -1, v0 487; GFX6-NEXT: v_lshrrev_b32_e32 v5, 8, v1 488; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v1 489; GFX6-NEXT: v_lshrrev_b32_e32 v7, 24, v1 490; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 491; GFX6-NEXT: s_brev_b32 s5, 1 492; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s4, v8 493; GFX6-NEXT: v_min_i32_e32 v10, -1, v0 494; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s5, v10 495; GFX6-NEXT: v_max_i32_e32 v1, v8, v1 496; GFX6-NEXT: v_min_i32_e32 v1, v1, v10 497; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 498; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2 499; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v5 500; GFX6-NEXT: v_max_i32_e32 v5, -1, v1 501; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v5 502; GFX6-NEXT: v_min_i32_e32 v8, -1, v1 503; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s5, v8 504; GFX6-NEXT: v_max_i32_e32 v2, v5, v2 505; GFX6-NEXT: v_min_i32_e32 v2, v2, v8 506; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 507; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3 508; GFX6-NEXT: v_bfrev_b32_e32 v9, -2 509; GFX6-NEXT: v_max_i32_e32 v5, -1, v2 510; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v6 511; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9 512; GFX6-NEXT: v_min_i32_e32 v6, -1, v2 513; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, s5, v6 514; GFX6-NEXT: v_max_i32_e32 v3, v5, v3 515; GFX6-NEXT: v_min_i32_e32 v3, v3, v6 516; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 517; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v4 518; GFX6-NEXT: v_max_i32_e32 v5, -1, v3 519; GFX6-NEXT: v_bfrev_b32_e32 v11, 1 520; GFX6-NEXT: v_ashrrev_i32_e32 v1, 24, v1 521; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v7 522; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9 523; GFX6-NEXT: v_min_i32_e32 v6, -1, v3 524; GFX6-NEXT: s_movk_i32 s4, 0xff 525; GFX6-NEXT: v_ashrrev_i32_e32 v0, 24, v0 526; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v11 527; GFX6-NEXT: v_max_i32_e32 v4, v5, v4 528; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 529; GFX6-NEXT: v_ashrrev_i32_e32 v2, 24, v2 530; GFX6-NEXT: v_min_i32_e32 v4, v4, v6 531; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 532; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 533; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v4 534; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 535; GFX6-NEXT: v_and_b32_e32 v1, s4, v2 536; GFX6-NEXT: v_ashrrev_i32_e32 v3, 24, v3 537; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 538; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 539; GFX6-NEXT: v_and_b32_e32 v1, s4, v3 540; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 541; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 542; GFX6-NEXT: s_setpc_b64 s[30:31] 543; 544; GFX8-LABEL: v_ssubsat_v4i8: 545; GFX8: ; %bb.0: 546; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 547; GFX8-NEXT: v_mov_b32_e32 v2, 8 548; GFX8-NEXT: v_lshrrev_b32_sdwa v3, v2, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 549; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v0 550; GFX8-NEXT: v_lshrrev_b32_e32 v5, 24, v0 551; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 552; GFX8-NEXT: s_movk_i32 s4, 0x7fff 553; GFX8-NEXT: v_max_i16_e32 v8, -1, v0 554; GFX8-NEXT: v_lshrrev_b32_sdwa v2, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 555; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v1 556; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v1 557; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 558; GFX8-NEXT: s_movk_i32 s5, 0x8000 559; GFX8-NEXT: v_subrev_u16_e32 v8, s4, v8 560; GFX8-NEXT: v_min_i16_e32 v10, -1, v0 561; GFX8-NEXT: v_subrev_u16_e32 v10, s5, v10 562; GFX8-NEXT: v_max_i16_e32 v1, v8, v1 563; GFX8-NEXT: v_min_i16_e32 v1, v1, v10 564; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 565; GFX8-NEXT: v_max_i16_e32 v1, -1, v3 566; GFX8-NEXT: v_subrev_u16_e32 v1, s4, v1 567; GFX8-NEXT: v_min_i16_e32 v8, -1, v3 568; GFX8-NEXT: v_subrev_u16_e32 v8, s5, v8 569; GFX8-NEXT: v_max_i16_e32 v1, v1, v2 570; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v4 571; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fff 572; GFX8-NEXT: v_min_i16_e32 v1, v1, v8 573; GFX8-NEXT: v_max_i16_e32 v4, -1, v2 574; GFX8-NEXT: v_sub_u16_e32 v1, v3, v1 575; GFX8-NEXT: v_lshlrev_b16_e32 v3, 8, v6 576; GFX8-NEXT: v_sub_u16_e32 v4, v4, v9 577; GFX8-NEXT: v_min_i16_e32 v6, -1, v2 578; GFX8-NEXT: v_subrev_u16_e32 v6, s5, v6 579; GFX8-NEXT: v_max_i16_e32 v3, v4, v3 580; GFX8-NEXT: v_min_i16_e32 v3, v3, v6 581; GFX8-NEXT: v_sub_u16_e32 v2, v2, v3 582; GFX8-NEXT: v_lshlrev_b16_e32 v3, 8, v5 583; GFX8-NEXT: v_max_i16_e32 v5, -1, v3 584; GFX8-NEXT: v_lshlrev_b16_e32 v4, 8, v7 585; GFX8-NEXT: v_sub_u16_e32 v5, v5, v9 586; GFX8-NEXT: v_min_i16_e32 v6, -1, v3 587; GFX8-NEXT: v_subrev_u16_e32 v6, 0x8000, v6 588; GFX8-NEXT: v_max_i16_e32 v4, v5, v4 589; GFX8-NEXT: v_min_i16_e32 v4, v4, v6 590; GFX8-NEXT: v_sub_u16_e32 v3, v3, v4 591; GFX8-NEXT: v_mov_b32_e32 v4, 0xff 592; GFX8-NEXT: v_and_b32_sdwa v1, sext(v1), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD 593; GFX8-NEXT: v_and_b32_sdwa v0, sext(v0), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD 594; GFX8-NEXT: v_lshlrev_b32_e32 v1, 8, v1 595; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 596; GFX8-NEXT: v_and_b32_sdwa v1, sext(v2), v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD 597; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 598; GFX8-NEXT: v_and_b32_sdwa v1, sext(v3), v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD 599; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 600; GFX8-NEXT: s_setpc_b64 s[30:31] 601; 602; GFX9-LABEL: v_ssubsat_v4i8: 603; GFX9: ; %bb.0: 604; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 605; GFX9-NEXT: s_mov_b32 s4, 8 606; GFX9-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 607; GFX9-NEXT: v_lshrrev_b32_e32 v4, 24, v0 608; GFX9-NEXT: v_mov_b32_e32 v8, 0xffff 609; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 610; GFX9-NEXT: v_lshrrev_b32_sdwa v5, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 611; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 612; GFX9-NEXT: v_and_or_b32 v0, v0, v8, v2 613; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4 614; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 615; GFX9-NEXT: v_and_or_b32 v2, v3, v8, v2 616; GFX9-NEXT: v_and_or_b32 v1, v1, v8, v5 617; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v7 618; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] 619; GFX9-NEXT: v_and_or_b32 v3, v6, v8, v3 620; GFX9-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] 621; GFX9-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] 622; GFX9-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] 623; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 clamp 624; GFX9-NEXT: v_pk_sub_i16 v1, v2, v3 clamp 625; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] 626; GFX9-NEXT: v_mov_b32_e32 v2, 8 627; GFX9-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] 628; GFX9-NEXT: s_movk_i32 s4, 0xff 629; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 630; GFX9-NEXT: v_and_or_b32 v0, v0, s4, v2 631; GFX9-NEXT: v_and_b32_e32 v2, s4, v1 632; GFX9-NEXT: v_mov_b32_e32 v3, 24 633; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 634; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 635; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 636; GFX9-NEXT: s_setpc_b64 s[30:31] 637; 638; GFX10-LABEL: v_ssubsat_v4i8: 639; GFX10: ; %bb.0: 640; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 641; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 642; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 643; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 644; GFX10-NEXT: s_mov_b32 s4, 8 645; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 646; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 647; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 648; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 649; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 650; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 651; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 652; GFX10-NEXT: s_movk_i32 s4, 0xff 653; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 654; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 655; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 656; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 657; GFX10-NEXT: v_mov_b32_e32 v4, 24 658; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] 659; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] 660; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] 661; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] 662; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 clamp 663; GFX10-NEXT: v_pk_sub_i16 v1, v2, v3 clamp 664; GFX10-NEXT: v_mov_b32_e32 v2, 8 665; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] 666; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] 667; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 668; GFX10-NEXT: v_and_b32_e32 v3, s4, v1 669; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 670; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v2 671; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v3 672; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 673; GFX10-NEXT: s_setpc_b64 s[30:31] 674 %lhs = bitcast i32 %lhs.arg to <4 x i8> 675 %rhs = bitcast i32 %rhs.arg to <4 x i8> 676 %result = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %lhs, <4 x i8> %rhs) 677 %cast.result = bitcast <4 x i8> %result to i32 678 ret i32 %cast.result 679} 680 681define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) { 682; GFX6-LABEL: s_ssubsat_v4i8: 683; GFX6: ; %bb.0: 684; GFX6-NEXT: s_lshr_b32 s2, s0, 8 685; GFX6-NEXT: s_lshr_b32 s3, s0, 16 686; GFX6-NEXT: s_lshr_b32 s4, s0, 24 687; GFX6-NEXT: s_lshl_b32 s0, s0, 24 688; GFX6-NEXT: s_brev_b32 s8, -2 689; GFX6-NEXT: s_max_i32 s10, s0, -1 690; GFX6-NEXT: s_lshr_b32 s5, s1, 8 691; GFX6-NEXT: s_lshr_b32 s6, s1, 16 692; GFX6-NEXT: s_lshr_b32 s7, s1, 24 693; GFX6-NEXT: s_lshl_b32 s1, s1, 24 694; GFX6-NEXT: s_brev_b32 s9, 1 695; GFX6-NEXT: s_sub_i32 s10, s10, s8 696; GFX6-NEXT: s_min_i32 s11, s0, -1 697; GFX6-NEXT: s_sub_i32 s11, s11, s9 698; GFX6-NEXT: s_max_i32 s1, s10, s1 699; GFX6-NEXT: s_min_i32 s1, s1, s11 700; GFX6-NEXT: s_sub_i32 s0, s0, s1 701; GFX6-NEXT: s_lshl_b32 s1, s2, 24 702; GFX6-NEXT: s_lshl_b32 s2, s5, 24 703; GFX6-NEXT: s_max_i32 s5, s1, -1 704; GFX6-NEXT: s_sub_i32 s5, s5, s8 705; GFX6-NEXT: s_min_i32 s10, s1, -1 706; GFX6-NEXT: s_sub_i32 s10, s10, s9 707; GFX6-NEXT: s_max_i32 s2, s5, s2 708; GFX6-NEXT: s_min_i32 s2, s2, s10 709; GFX6-NEXT: s_sub_i32 s1, s1, s2 710; GFX6-NEXT: s_lshl_b32 s2, s3, 24 711; GFX6-NEXT: s_max_i32 s5, s2, -1 712; GFX6-NEXT: s_lshl_b32 s3, s6, 24 713; GFX6-NEXT: s_sub_i32 s5, s5, s8 714; GFX6-NEXT: s_min_i32 s6, s2, -1 715; GFX6-NEXT: s_sub_i32 s6, s6, s9 716; GFX6-NEXT: s_max_i32 s3, s5, s3 717; GFX6-NEXT: s_min_i32 s3, s3, s6 718; GFX6-NEXT: s_sub_i32 s2, s2, s3 719; GFX6-NEXT: s_lshl_b32 s3, s4, 24 720; GFX6-NEXT: s_max_i32 s5, s3, -1 721; GFX6-NEXT: s_lshl_b32 s4, s7, 24 722; GFX6-NEXT: s_sub_i32 s5, s5, s8 723; GFX6-NEXT: s_min_i32 s6, s3, -1 724; GFX6-NEXT: s_sub_i32 s6, s6, s9 725; GFX6-NEXT: s_max_i32 s4, s5, s4 726; GFX6-NEXT: s_min_i32 s4, s4, s6 727; GFX6-NEXT: s_ashr_i32 s1, s1, 24 728; GFX6-NEXT: s_sub_i32 s3, s3, s4 729; GFX6-NEXT: s_movk_i32 s4, 0xff 730; GFX6-NEXT: s_ashr_i32 s0, s0, 24 731; GFX6-NEXT: s_and_b32 s1, s1, s4 732; GFX6-NEXT: s_ashr_i32 s2, s2, 24 733; GFX6-NEXT: s_and_b32 s0, s0, s4 734; GFX6-NEXT: s_lshl_b32 s1, s1, 8 735; GFX6-NEXT: s_or_b32 s0, s0, s1 736; GFX6-NEXT: s_and_b32 s1, s2, s4 737; GFX6-NEXT: s_ashr_i32 s3, s3, 24 738; GFX6-NEXT: s_lshl_b32 s1, s1, 16 739; GFX6-NEXT: s_or_b32 s0, s0, s1 740; GFX6-NEXT: s_and_b32 s1, s3, s4 741; GFX6-NEXT: s_lshl_b32 s1, s1, 24 742; GFX6-NEXT: s_or_b32 s0, s0, s1 743; GFX6-NEXT: ; return to shader part epilog 744; 745; GFX8-LABEL: s_ssubsat_v4i8: 746; GFX8: ; %bb.0: 747; GFX8-NEXT: s_bfe_u32 s8, 8, 0x100000 748; GFX8-NEXT: s_lshr_b32 s2, s0, 8 749; GFX8-NEXT: s_lshr_b32 s3, s0, 16 750; GFX8-NEXT: s_lshr_b32 s4, s0, 24 751; GFX8-NEXT: s_lshl_b32 s0, s0, s8 752; GFX8-NEXT: s_sext_i32_i16 s11, s0 753; GFX8-NEXT: s_sext_i32_i16 s12, -1 754; GFX8-NEXT: s_movk_i32 s9, 0x7fff 755; GFX8-NEXT: s_max_i32 s13, s11, s12 756; GFX8-NEXT: s_lshr_b32 s5, s1, 8 757; GFX8-NEXT: s_lshr_b32 s6, s1, 16 758; GFX8-NEXT: s_lshr_b32 s7, s1, 24 759; GFX8-NEXT: s_lshl_b32 s1, s1, s8 760; GFX8-NEXT: s_sub_i32 s13, s13, s9 761; GFX8-NEXT: s_movk_i32 s10, 0x8000 762; GFX8-NEXT: s_min_i32 s11, s11, s12 763; GFX8-NEXT: s_sext_i32_i16 s13, s13 764; GFX8-NEXT: s_sext_i32_i16 s1, s1 765; GFX8-NEXT: s_sub_i32 s11, s11, s10 766; GFX8-NEXT: s_max_i32 s1, s13, s1 767; GFX8-NEXT: s_sext_i32_i16 s1, s1 768; GFX8-NEXT: s_sext_i32_i16 s11, s11 769; GFX8-NEXT: s_min_i32 s1, s1, s11 770; GFX8-NEXT: s_sub_i32 s0, s0, s1 771; GFX8-NEXT: s_lshl_b32 s1, s2, s8 772; GFX8-NEXT: s_lshl_b32 s2, s5, s8 773; GFX8-NEXT: s_sext_i32_i16 s5, s1 774; GFX8-NEXT: s_max_i32 s11, s5, s12 775; GFX8-NEXT: s_sub_i32 s11, s11, s9 776; GFX8-NEXT: s_min_i32 s5, s5, s12 777; GFX8-NEXT: s_sext_i32_i16 s11, s11 778; GFX8-NEXT: s_sext_i32_i16 s2, s2 779; GFX8-NEXT: s_sub_i32 s5, s5, s10 780; GFX8-NEXT: s_max_i32 s2, s11, s2 781; GFX8-NEXT: s_sext_i32_i16 s2, s2 782; GFX8-NEXT: s_sext_i32_i16 s5, s5 783; GFX8-NEXT: s_min_i32 s2, s2, s5 784; GFX8-NEXT: s_sub_i32 s1, s1, s2 785; GFX8-NEXT: s_lshl_b32 s2, s3, s8 786; GFX8-NEXT: s_sext_i32_i16 s5, s2 787; GFX8-NEXT: s_lshl_b32 s3, s6, s8 788; GFX8-NEXT: s_max_i32 s6, s5, s12 789; GFX8-NEXT: s_sub_i32 s6, s6, s9 790; GFX8-NEXT: s_min_i32 s5, s5, s12 791; GFX8-NEXT: s_sext_i32_i16 s6, s6 792; GFX8-NEXT: s_sext_i32_i16 s3, s3 793; GFX8-NEXT: s_sub_i32 s5, s5, s10 794; GFX8-NEXT: s_max_i32 s3, s6, s3 795; GFX8-NEXT: s_sext_i32_i16 s3, s3 796; GFX8-NEXT: s_sext_i32_i16 s5, s5 797; GFX8-NEXT: s_min_i32 s3, s3, s5 798; GFX8-NEXT: s_sub_i32 s2, s2, s3 799; GFX8-NEXT: s_lshl_b32 s3, s4, s8 800; GFX8-NEXT: s_sext_i32_i16 s5, s3 801; GFX8-NEXT: s_max_i32 s6, s5, s12 802; GFX8-NEXT: s_lshl_b32 s4, s7, s8 803; GFX8-NEXT: s_sub_i32 s6, s6, s9 804; GFX8-NEXT: s_min_i32 s5, s5, s12 805; GFX8-NEXT: s_sext_i32_i16 s6, s6 806; GFX8-NEXT: s_sext_i32_i16 s4, s4 807; GFX8-NEXT: s_sub_i32 s5, s5, s10 808; GFX8-NEXT: s_max_i32 s4, s6, s4 809; GFX8-NEXT: s_sext_i32_i16 s4, s4 810; GFX8-NEXT: s_sext_i32_i16 s5, s5 811; GFX8-NEXT: s_sext_i32_i16 s1, s1 812; GFX8-NEXT: s_min_i32 s4, s4, s5 813; GFX8-NEXT: s_sext_i32_i16 s0, s0 814; GFX8-NEXT: s_ashr_i32 s1, s1, s8 815; GFX8-NEXT: s_sub_i32 s3, s3, s4 816; GFX8-NEXT: s_movk_i32 s4, 0xff 817; GFX8-NEXT: s_ashr_i32 s0, s0, s8 818; GFX8-NEXT: s_sext_i32_i16 s2, s2 819; GFX8-NEXT: s_and_b32 s1, s1, s4 820; GFX8-NEXT: s_ashr_i32 s2, s2, s8 821; GFX8-NEXT: s_and_b32 s0, s0, s4 822; GFX8-NEXT: s_lshl_b32 s1, s1, 8 823; GFX8-NEXT: s_sext_i32_i16 s3, s3 824; GFX8-NEXT: s_or_b32 s0, s0, s1 825; GFX8-NEXT: s_and_b32 s1, s2, s4 826; GFX8-NEXT: s_ashr_i32 s3, s3, s8 827; GFX8-NEXT: s_lshl_b32 s1, s1, 16 828; GFX8-NEXT: s_or_b32 s0, s0, s1 829; GFX8-NEXT: s_and_b32 s1, s3, s4 830; GFX8-NEXT: s_lshl_b32 s1, s1, 24 831; GFX8-NEXT: s_or_b32 s0, s0, s1 832; GFX8-NEXT: ; return to shader part epilog 833; 834; GFX9-LABEL: s_ssubsat_v4i8: 835; GFX9: ; %bb.0: 836; GFX9-NEXT: s_lshr_b32 s3, s0, 8 837; GFX9-NEXT: s_lshr_b32 s4, s0, 16 838; GFX9-NEXT: s_lshr_b32 s6, s0, 24 839; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3 840; GFX9-NEXT: s_pack_ll_b32_b16 s3, s4, s6 841; GFX9-NEXT: s_mov_b32 s4, 0x80008 842; GFX9-NEXT: s_lshr_b32 s6, s0, 16 843; GFX9-NEXT: s_lshr_b32 s7, s1, 8 844; GFX9-NEXT: s_lshl_b32 s0, s0, s4 845; GFX9-NEXT: s_lshl_b32 s6, s6, 8 846; GFX9-NEXT: s_lshr_b32 s8, s1, 16 847; GFX9-NEXT: s_lshr_b32 s9, s1, 24 848; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s6 849; GFX9-NEXT: s_lshr_b32 s6, s3, 16 850; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s7 851; GFX9-NEXT: s_lshl_b32 s3, s3, s4 852; GFX9-NEXT: s_lshl_b32 s6, s6, 8 853; GFX9-NEXT: s_lshr_b32 s7, s1, 16 854; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s6 855; GFX9-NEXT: s_pack_ll_b32_b16 s6, s8, s9 856; GFX9-NEXT: s_lshl_b32 s1, s1, s4 857; GFX9-NEXT: s_lshl_b32 s7, s7, 8 858; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s7 859; GFX9-NEXT: s_lshr_b32 s7, s6, 16 860; GFX9-NEXT: s_lshl_b32 s4, s6, s4 861; GFX9-NEXT: s_lshl_b32 s6, s7, 8 862; GFX9-NEXT: s_pack_ll_b32_b16 s4, s4, s6 863; GFX9-NEXT: v_mov_b32_e32 v0, s1 864; GFX9-NEXT: v_pk_sub_i16 v0, s0, v0 clamp 865; GFX9-NEXT: v_mov_b32_e32 v1, s4 866; GFX9-NEXT: s_mov_b32 s2, 8 867; GFX9-NEXT: v_pk_sub_i16 v1, s3, v1 clamp 868; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] 869; GFX9-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] 870; GFX9-NEXT: s_movk_i32 s0, 0xff 871; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 872; GFX9-NEXT: s_mov_b32 s5, 24 873; GFX9-NEXT: v_and_or_b32 v0, v0, s0, v2 874; GFX9-NEXT: v_and_b32_e32 v2, s0, v1 875; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 876; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 877; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 878; GFX9-NEXT: v_readfirstlane_b32 s0, v0 879; GFX9-NEXT: ; return to shader part epilog 880; 881; GFX10-LABEL: s_ssubsat_v4i8: 882; GFX10: ; %bb.0: 883; GFX10-NEXT: s_lshr_b32 s2, s0, 8 884; GFX10-NEXT: s_lshr_b32 s3, s0, 16 885; GFX10-NEXT: s_lshr_b32 s4, s0, 24 886; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 887; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4 888; GFX10-NEXT: s_mov_b32 s3, 0x80008 889; GFX10-NEXT: s_lshr_b32 s4, s0, 16 890; GFX10-NEXT: s_lshr_b32 s5, s1, 8 891; GFX10-NEXT: s_lshr_b32 s6, s1, 16 892; GFX10-NEXT: s_lshr_b32 s7, s1, 24 893; GFX10-NEXT: s_lshl_b32 s0, s0, s3 894; GFX10-NEXT: s_lshl_b32 s4, s4, 8 895; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5 896; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 897; GFX10-NEXT: s_pack_ll_b32_b16 s4, s6, s7 898; GFX10-NEXT: s_lshr_b32 s8, s2, 16 899; GFX10-NEXT: s_lshr_b32 s5, s1, 16 900; GFX10-NEXT: s_lshr_b32 s6, s4, 16 901; GFX10-NEXT: s_lshl_b32 s2, s2, s3 902; GFX10-NEXT: s_lshl_b32 s8, s8, 8 903; GFX10-NEXT: s_lshl_b32 s1, s1, s3 904; GFX10-NEXT: s_lshl_b32 s5, s5, 8 905; GFX10-NEXT: s_lshl_b32 s3, s4, s3 906; GFX10-NEXT: s_lshl_b32 s4, s6, 8 907; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s8 908; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s5 909; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s4 910; GFX10-NEXT: v_pk_sub_i16 v0, s0, s1 clamp 911; GFX10-NEXT: v_pk_sub_i16 v1, s2, s3 clamp 912; GFX10-NEXT: s_mov_b32 s0, 8 913; GFX10-NEXT: s_movk_i32 s1, 0xff 914; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] 915; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] 916; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 917; GFX10-NEXT: v_and_b32_e32 v3, s1, v1 918; GFX10-NEXT: s_mov_b32 s0, 24 919; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 920; GFX10-NEXT: v_and_or_b32 v0, v0, s1, v2 921; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v3 922; GFX10-NEXT: v_or3_b32 v0, v0, v2, v1 923; GFX10-NEXT: v_readfirstlane_b32 s0, v0 924; GFX10-NEXT: ; return to shader part epilog 925 %lhs = bitcast i32 %lhs.arg to <4 x i8> 926 %rhs = bitcast i32 %rhs.arg to <4 x i8> 927 %result = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %lhs, <4 x i8> %rhs) 928 %cast.result = bitcast <4 x i8> %result to i32 929 ret i32 %cast.result 930} 931 932define i24 @v_ssubsat_i24(i24 %lhs, i24 %rhs) { 933; GFX6-LABEL: v_ssubsat_i24: 934; GFX6: ; %bb.0: 935; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 936; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 937; GFX6-NEXT: v_max_i32_e32 v2, -1, v0 938; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 939; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2 940; GFX6-NEXT: v_min_i32_e32 v3, -1, v0 941; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3 942; GFX6-NEXT: v_max_i32_e32 v1, v2, v1 943; GFX6-NEXT: v_min_i32_e32 v1, v1, v3 944; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 945; GFX6-NEXT: v_ashrrev_i32_e32 v0, 8, v0 946; GFX6-NEXT: s_setpc_b64 s[30:31] 947; 948; GFX8-LABEL: v_ssubsat_i24: 949; GFX8: ; %bb.0: 950; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 951; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v0, v1 952; GFX8-NEXT: v_bfe_i32 v3, v2, 0, 24 953; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 24 954; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0 955; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 24 956; GFX8-NEXT: v_cmp_lt_i32_e64 s[6:7], 0, v0 957; GFX8-NEXT: v_ashrrev_i32_e32 v0, 23, v3 958; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0xff800000, v0 959; GFX8-NEXT: s_xor_b64 vcc, s[6:7], s[4:5] 960; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 961; GFX8-NEXT: s_setpc_b64 s[30:31] 962; 963; GFX9-LABEL: v_ssubsat_i24: 964; GFX9: ; %bb.0: 965; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 966; GFX9-NEXT: v_lshlrev_b32_e32 v0, 8, v0 967; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v1 968; GFX9-NEXT: v_sub_i32 v0, v0, v1 clamp 969; GFX9-NEXT: v_ashrrev_i32_e32 v0, 8, v0 970; GFX9-NEXT: s_setpc_b64 s[30:31] 971; 972; GFX10-LABEL: v_ssubsat_i24: 973; GFX10: ; %bb.0: 974; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 975; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 976; GFX10-NEXT: v_lshlrev_b32_e32 v0, 8, v0 977; GFX10-NEXT: v_lshlrev_b32_e32 v1, 8, v1 978; GFX10-NEXT: v_sub_nc_i32 v0, v0, v1 clamp 979; GFX10-NEXT: v_ashrrev_i32_e32 v0, 8, v0 980; GFX10-NEXT: s_setpc_b64 s[30:31] 981 %result = call i24 @llvm.ssub.sat.i24(i24 %lhs, i24 %rhs) 982 ret i24 %result 983} 984 985define amdgpu_ps i24 @s_ssubsat_i24(i24 inreg %lhs, i24 inreg %rhs) { 986; GFX6-LABEL: s_ssubsat_i24: 987; GFX6: ; %bb.0: 988; GFX6-NEXT: s_lshl_b32 s0, s0, 8 989; GFX6-NEXT: s_max_i32 s2, s0, -1 990; GFX6-NEXT: s_lshl_b32 s1, s1, 8 991; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff 992; GFX6-NEXT: s_min_i32 s3, s0, -1 993; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000 994; GFX6-NEXT: s_max_i32 s1, s2, s1 995; GFX6-NEXT: s_min_i32 s1, s1, s3 996; GFX6-NEXT: s_sub_i32 s0, s0, s1 997; GFX6-NEXT: s_ashr_i32 s0, s0, 8 998; GFX6-NEXT: ; return to shader part epilog 999; 1000; GFX8-LABEL: s_ssubsat_i24: 1001; GFX8: ; %bb.0: 1002; GFX8-NEXT: s_sub_i32 s2, s0, s1 1003; GFX8-NEXT: s_bfe_i32 s3, s2, 0x180000 1004; GFX8-NEXT: s_bfe_i32 s0, s0, 0x180000 1005; GFX8-NEXT: s_cmp_lt_i32 s3, s0 1006; GFX8-NEXT: s_cselect_b32 s0, 1, 0 1007; GFX8-NEXT: s_bfe_i32 s1, s1, 0x180000 1008; GFX8-NEXT: s_cmp_gt_i32 s1, 0 1009; GFX8-NEXT: s_cselect_b32 s1, 1, 0 1010; GFX8-NEXT: s_xor_b32 s0, s1, s0 1011; GFX8-NEXT: s_ashr_i32 s1, s3, 23 1012; GFX8-NEXT: s_add_i32 s1, s1, 0xff800000 1013; GFX8-NEXT: s_and_b32 s0, s0, 1 1014; GFX8-NEXT: s_cmp_lg_u32 s0, 0 1015; GFX8-NEXT: s_cselect_b32 s0, s1, s2 1016; GFX8-NEXT: ; return to shader part epilog 1017; 1018; GFX9-LABEL: s_ssubsat_i24: 1019; GFX9: ; %bb.0: 1020; GFX9-NEXT: s_lshl_b32 s1, s1, 8 1021; GFX9-NEXT: s_lshl_b32 s0, s0, 8 1022; GFX9-NEXT: v_mov_b32_e32 v0, s1 1023; GFX9-NEXT: v_sub_i32 v0, s0, v0 clamp 1024; GFX9-NEXT: v_ashrrev_i32_e32 v0, 8, v0 1025; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1026; GFX9-NEXT: ; return to shader part epilog 1027; 1028; GFX10-LABEL: s_ssubsat_i24: 1029; GFX10: ; %bb.0: 1030; GFX10-NEXT: s_lshl_b32 s0, s0, 8 1031; GFX10-NEXT: s_lshl_b32 s1, s1, 8 1032; GFX10-NEXT: v_sub_nc_i32 v0, s0, s1 clamp 1033; GFX10-NEXT: v_ashrrev_i32_e32 v0, 8, v0 1034; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1035; GFX10-NEXT: ; return to shader part epilog 1036 %result = call i24 @llvm.ssub.sat.i24(i24 %lhs, i24 %rhs) 1037 ret i24 %result 1038} 1039 1040define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) { 1041; GFX6-LABEL: v_ssubsat_i32: 1042; GFX6: ; %bb.0: 1043; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1044; GFX6-NEXT: v_max_i32_e32 v2, -1, v0 1045; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2 1046; GFX6-NEXT: v_min_i32_e32 v3, -1, v0 1047; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3 1048; GFX6-NEXT: v_max_i32_e32 v1, v2, v1 1049; GFX6-NEXT: v_min_i32_e32 v1, v1, v3 1050; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 1051; GFX6-NEXT: s_setpc_b64 s[30:31] 1052; 1053; GFX8-LABEL: v_ssubsat_i32: 1054; GFX8: ; %bb.0: 1055; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1056; GFX8-NEXT: v_max_i32_e32 v2, -1, v0 1057; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 0x7fffffff, v2 1058; GFX8-NEXT: v_min_i32_e32 v3, -1, v0 1059; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 0x80000000, v3 1060; GFX8-NEXT: v_max_i32_e32 v1, v2, v1 1061; GFX8-NEXT: v_min_i32_e32 v1, v1, v3 1062; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 1063; GFX8-NEXT: s_setpc_b64 s[30:31] 1064; 1065; GFX9-LABEL: v_ssubsat_i32: 1066; GFX9: ; %bb.0: 1067; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1068; GFX9-NEXT: v_sub_i32 v0, v0, v1 clamp 1069; GFX9-NEXT: s_setpc_b64 s[30:31] 1070; 1071; GFX10-LABEL: v_ssubsat_i32: 1072; GFX10: ; %bb.0: 1073; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1074; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1075; GFX10-NEXT: v_sub_nc_i32 v0, v0, v1 clamp 1076; GFX10-NEXT: s_setpc_b64 s[30:31] 1077 %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) 1078 ret i32 %result 1079} 1080 1081define amdgpu_ps i32 @s_ssubsat_i32(i32 inreg %lhs, i32 inreg %rhs) { 1082; GFX6-LABEL: s_ssubsat_i32: 1083; GFX6: ; %bb.0: 1084; GFX6-NEXT: s_max_i32 s2, s0, -1 1085; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff 1086; GFX6-NEXT: s_min_i32 s3, s0, -1 1087; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000 1088; GFX6-NEXT: s_max_i32 s1, s2, s1 1089; GFX6-NEXT: s_min_i32 s1, s1, s3 1090; GFX6-NEXT: s_sub_i32 s0, s0, s1 1091; GFX6-NEXT: ; return to shader part epilog 1092; 1093; GFX8-LABEL: s_ssubsat_i32: 1094; GFX8: ; %bb.0: 1095; GFX8-NEXT: s_max_i32 s2, s0, -1 1096; GFX8-NEXT: s_sub_i32 s2, s2, 0x7fffffff 1097; GFX8-NEXT: s_min_i32 s3, s0, -1 1098; GFX8-NEXT: s_sub_i32 s3, s3, 0x80000000 1099; GFX8-NEXT: s_max_i32 s1, s2, s1 1100; GFX8-NEXT: s_min_i32 s1, s1, s3 1101; GFX8-NEXT: s_sub_i32 s0, s0, s1 1102; GFX8-NEXT: ; return to shader part epilog 1103; 1104; GFX9-LABEL: s_ssubsat_i32: 1105; GFX9: ; %bb.0: 1106; GFX9-NEXT: v_mov_b32_e32 v0, s1 1107; GFX9-NEXT: v_sub_i32 v0, s0, v0 clamp 1108; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1109; GFX9-NEXT: ; return to shader part epilog 1110; 1111; GFX10-LABEL: s_ssubsat_i32: 1112; GFX10: ; %bb.0: 1113; GFX10-NEXT: v_sub_nc_i32 v0, s0, s1 clamp 1114; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1115; GFX10-NEXT: ; return to shader part epilog 1116 %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) 1117 ret i32 %result 1118} 1119 1120define amdgpu_ps float @ssubsat_i32_sv(i32 inreg %lhs, i32 %rhs) { 1121; GFX6-LABEL: ssubsat_i32_sv: 1122; GFX6: ; %bb.0: 1123; GFX6-NEXT: s_max_i32 s1, s0, -1 1124; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff 1125; GFX6-NEXT: s_min_i32 s2, s0, -1 1126; GFX6-NEXT: s_sub_i32 s2, s2, 0x80000000 1127; GFX6-NEXT: v_max_i32_e32 v0, s1, v0 1128; GFX6-NEXT: v_min_i32_e32 v0, s2, v0 1129; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 1130; GFX6-NEXT: ; return to shader part epilog 1131; 1132; GFX8-LABEL: ssubsat_i32_sv: 1133; GFX8: ; %bb.0: 1134; GFX8-NEXT: s_max_i32 s1, s0, -1 1135; GFX8-NEXT: s_sub_i32 s1, s1, 0x7fffffff 1136; GFX8-NEXT: s_min_i32 s2, s0, -1 1137; GFX8-NEXT: s_sub_i32 s2, s2, 0x80000000 1138; GFX8-NEXT: v_max_i32_e32 v0, s1, v0 1139; GFX8-NEXT: v_min_i32_e32 v0, s2, v0 1140; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s0, v0 1141; GFX8-NEXT: ; return to shader part epilog 1142; 1143; GFX9-LABEL: ssubsat_i32_sv: 1144; GFX9: ; %bb.0: 1145; GFX9-NEXT: v_sub_i32 v0, s0, v0 clamp 1146; GFX9-NEXT: ; return to shader part epilog 1147; 1148; GFX10-LABEL: ssubsat_i32_sv: 1149; GFX10: ; %bb.0: 1150; GFX10-NEXT: v_sub_nc_i32 v0, s0, v0 clamp 1151; GFX10-NEXT: ; return to shader part epilog 1152 %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) 1153 %cast = bitcast i32 %result to float 1154 ret float %cast 1155} 1156 1157define amdgpu_ps float @ssubsat_i32_vs(i32 %lhs, i32 inreg %rhs) { 1158; GFX6-LABEL: ssubsat_i32_vs: 1159; GFX6: ; %bb.0: 1160; GFX6-NEXT: v_max_i32_e32 v1, -1, v0 1161; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 0x7fffffff, v1 1162; GFX6-NEXT: v_min_i32_e32 v2, -1, v0 1163; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x80000000, v2 1164; GFX6-NEXT: v_max_i32_e32 v1, s0, v1 1165; GFX6-NEXT: v_min_i32_e32 v1, v1, v2 1166; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 1167; GFX6-NEXT: ; return to shader part epilog 1168; 1169; GFX8-LABEL: ssubsat_i32_vs: 1170; GFX8: ; %bb.0: 1171; GFX8-NEXT: v_max_i32_e32 v1, -1, v0 1172; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 0x7fffffff, v1 1173; GFX8-NEXT: v_min_i32_e32 v2, -1, v0 1174; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 0x80000000, v2 1175; GFX8-NEXT: v_max_i32_e32 v1, s0, v1 1176; GFX8-NEXT: v_min_i32_e32 v1, v1, v2 1177; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 1178; GFX8-NEXT: ; return to shader part epilog 1179; 1180; GFX9-LABEL: ssubsat_i32_vs: 1181; GFX9: ; %bb.0: 1182; GFX9-NEXT: v_sub_i32 v0, v0, s0 clamp 1183; GFX9-NEXT: ; return to shader part epilog 1184; 1185; GFX10-LABEL: ssubsat_i32_vs: 1186; GFX10: ; %bb.0: 1187; GFX10-NEXT: v_sub_nc_i32 v0, v0, s0 clamp 1188; GFX10-NEXT: ; return to shader part epilog 1189 %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) 1190 %cast = bitcast i32 %result to float 1191 ret float %cast 1192} 1193 1194define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 1195; GFX6-LABEL: v_ssubsat_v2i32: 1196; GFX6: ; %bb.0: 1197; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1198; GFX6-NEXT: s_brev_b32 s4, -2 1199; GFX6-NEXT: v_max_i32_e32 v4, -1, v0 1200; GFX6-NEXT: s_brev_b32 s5, 1 1201; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 1202; GFX6-NEXT: v_min_i32_e32 v5, -1, v0 1203; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 1204; GFX6-NEXT: v_max_i32_e32 v2, v4, v2 1205; GFX6-NEXT: v_min_i32_e32 v2, v2, v5 1206; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 1207; GFX6-NEXT: v_max_i32_e32 v2, -1, v1 1208; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s4, v2 1209; GFX6-NEXT: v_min_i32_e32 v4, -1, v1 1210; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 1211; GFX6-NEXT: v_max_i32_e32 v2, v2, v3 1212; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 1213; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 1214; GFX6-NEXT: s_setpc_b64 s[30:31] 1215; 1216; GFX8-LABEL: v_ssubsat_v2i32: 1217; GFX8: ; %bb.0: 1218; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1219; GFX8-NEXT: s_brev_b32 s4, -2 1220; GFX8-NEXT: v_max_i32_e32 v4, -1, v0 1221; GFX8-NEXT: s_brev_b32 s5, 1 1222; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4 1223; GFX8-NEXT: v_min_i32_e32 v5, -1, v0 1224; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s5, v5 1225; GFX8-NEXT: v_max_i32_e32 v2, v4, v2 1226; GFX8-NEXT: v_min_i32_e32 v2, v2, v5 1227; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 1228; GFX8-NEXT: v_max_i32_e32 v2, -1, v1 1229; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s4, v2 1230; GFX8-NEXT: v_min_i32_e32 v4, -1, v1 1231; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s5, v4 1232; GFX8-NEXT: v_max_i32_e32 v2, v2, v3 1233; GFX8-NEXT: v_min_i32_e32 v2, v2, v4 1234; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v2 1235; GFX8-NEXT: s_setpc_b64 s[30:31] 1236; 1237; GFX9-LABEL: v_ssubsat_v2i32: 1238; GFX9: ; %bb.0: 1239; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1240; GFX9-NEXT: v_sub_i32 v0, v0, v2 clamp 1241; GFX9-NEXT: v_sub_i32 v1, v1, v3 clamp 1242; GFX9-NEXT: s_setpc_b64 s[30:31] 1243; 1244; GFX10-LABEL: v_ssubsat_v2i32: 1245; GFX10: ; %bb.0: 1246; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1247; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1248; GFX10-NEXT: v_sub_nc_i32 v0, v0, v2 clamp 1249; GFX10-NEXT: v_sub_nc_i32 v1, v1, v3 clamp 1250; GFX10-NEXT: s_setpc_b64 s[30:31] 1251 %result = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 1252 ret <2 x i32> %result 1253} 1254 1255define amdgpu_ps <2 x i32> @s_ssubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inreg %rhs) { 1256; GFX6-LABEL: s_ssubsat_v2i32: 1257; GFX6: ; %bb.0: 1258; GFX6-NEXT: s_brev_b32 s4, -2 1259; GFX6-NEXT: s_max_i32 s6, s0, -1 1260; GFX6-NEXT: s_brev_b32 s5, 1 1261; GFX6-NEXT: s_sub_i32 s6, s6, s4 1262; GFX6-NEXT: s_min_i32 s7, s0, -1 1263; GFX6-NEXT: s_sub_i32 s7, s7, s5 1264; GFX6-NEXT: s_max_i32 s2, s6, s2 1265; GFX6-NEXT: s_min_i32 s2, s2, s7 1266; GFX6-NEXT: s_sub_i32 s0, s0, s2 1267; GFX6-NEXT: s_max_i32 s2, s1, -1 1268; GFX6-NEXT: s_sub_i32 s2, s2, s4 1269; GFX6-NEXT: s_min_i32 s4, s1, -1 1270; GFX6-NEXT: s_sub_i32 s4, s4, s5 1271; GFX6-NEXT: s_max_i32 s2, s2, s3 1272; GFX6-NEXT: s_min_i32 s2, s2, s4 1273; GFX6-NEXT: s_sub_i32 s1, s1, s2 1274; GFX6-NEXT: ; return to shader part epilog 1275; 1276; GFX8-LABEL: s_ssubsat_v2i32: 1277; GFX8: ; %bb.0: 1278; GFX8-NEXT: s_brev_b32 s4, -2 1279; GFX8-NEXT: s_max_i32 s6, s0, -1 1280; GFX8-NEXT: s_brev_b32 s5, 1 1281; GFX8-NEXT: s_sub_i32 s6, s6, s4 1282; GFX8-NEXT: s_min_i32 s7, s0, -1 1283; GFX8-NEXT: s_sub_i32 s7, s7, s5 1284; GFX8-NEXT: s_max_i32 s2, s6, s2 1285; GFX8-NEXT: s_min_i32 s2, s2, s7 1286; GFX8-NEXT: s_sub_i32 s0, s0, s2 1287; GFX8-NEXT: s_max_i32 s2, s1, -1 1288; GFX8-NEXT: s_sub_i32 s2, s2, s4 1289; GFX8-NEXT: s_min_i32 s4, s1, -1 1290; GFX8-NEXT: s_sub_i32 s4, s4, s5 1291; GFX8-NEXT: s_max_i32 s2, s2, s3 1292; GFX8-NEXT: s_min_i32 s2, s2, s4 1293; GFX8-NEXT: s_sub_i32 s1, s1, s2 1294; GFX8-NEXT: ; return to shader part epilog 1295; 1296; GFX9-LABEL: s_ssubsat_v2i32: 1297; GFX9: ; %bb.0: 1298; GFX9-NEXT: v_mov_b32_e32 v0, s2 1299; GFX9-NEXT: v_mov_b32_e32 v1, s3 1300; GFX9-NEXT: v_sub_i32 v0, s0, v0 clamp 1301; GFX9-NEXT: v_sub_i32 v1, s1, v1 clamp 1302; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1303; GFX9-NEXT: v_readfirstlane_b32 s1, v1 1304; GFX9-NEXT: ; return to shader part epilog 1305; 1306; GFX10-LABEL: s_ssubsat_v2i32: 1307; GFX10: ; %bb.0: 1308; GFX10-NEXT: v_sub_nc_i32 v0, s0, s2 clamp 1309; GFX10-NEXT: v_sub_nc_i32 v1, s1, s3 clamp 1310; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1311; GFX10-NEXT: v_readfirstlane_b32 s1, v1 1312; GFX10-NEXT: ; return to shader part epilog 1313 %result = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 1314 ret <2 x i32> %result 1315} 1316 1317define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { 1318; GFX6-LABEL: v_ssubsat_v3i32: 1319; GFX6: ; %bb.0: 1320; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1321; GFX6-NEXT: s_brev_b32 s4, -2 1322; GFX6-NEXT: v_max_i32_e32 v6, -1, v0 1323; GFX6-NEXT: s_brev_b32 s5, 1 1324; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, s4, v6 1325; GFX6-NEXT: v_min_i32_e32 v7, -1, v0 1326; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, s5, v7 1327; GFX6-NEXT: v_max_i32_e32 v3, v6, v3 1328; GFX6-NEXT: v_min_i32_e32 v3, v3, v7 1329; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 1330; GFX6-NEXT: v_max_i32_e32 v3, -1, v1 1331; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 1332; GFX6-NEXT: v_min_i32_e32 v6, -1, v1 1333; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, s5, v6 1334; GFX6-NEXT: v_max_i32_e32 v3, v3, v4 1335; GFX6-NEXT: v_min_i32_e32 v3, v3, v6 1336; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 1337; GFX6-NEXT: v_max_i32_e32 v3, -1, v2 1338; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 1339; GFX6-NEXT: v_min_i32_e32 v4, -1, v2 1340; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 1341; GFX6-NEXT: v_max_i32_e32 v3, v3, v5 1342; GFX6-NEXT: v_min_i32_e32 v3, v3, v4 1343; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 1344; GFX6-NEXT: s_setpc_b64 s[30:31] 1345; 1346; GFX8-LABEL: v_ssubsat_v3i32: 1347; GFX8: ; %bb.0: 1348; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1349; GFX8-NEXT: s_brev_b32 s4, -2 1350; GFX8-NEXT: v_max_i32_e32 v6, -1, v0 1351; GFX8-NEXT: s_brev_b32 s5, 1 1352; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s4, v6 1353; GFX8-NEXT: v_min_i32_e32 v7, -1, v0 1354; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, s5, v7 1355; GFX8-NEXT: v_max_i32_e32 v3, v6, v3 1356; GFX8-NEXT: v_min_i32_e32 v3, v3, v7 1357; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v3 1358; GFX8-NEXT: v_max_i32_e32 v3, -1, v1 1359; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s4, v3 1360; GFX8-NEXT: v_min_i32_e32 v6, -1, v1 1361; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s5, v6 1362; GFX8-NEXT: v_max_i32_e32 v3, v3, v4 1363; GFX8-NEXT: v_min_i32_e32 v3, v3, v6 1364; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v3 1365; GFX8-NEXT: v_max_i32_e32 v3, -1, v2 1366; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s4, v3 1367; GFX8-NEXT: v_min_i32_e32 v4, -1, v2 1368; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s5, v4 1369; GFX8-NEXT: v_max_i32_e32 v3, v3, v5 1370; GFX8-NEXT: v_min_i32_e32 v3, v3, v4 1371; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 1372; GFX8-NEXT: s_setpc_b64 s[30:31] 1373; 1374; GFX9-LABEL: v_ssubsat_v3i32: 1375; GFX9: ; %bb.0: 1376; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1377; GFX9-NEXT: v_sub_i32 v0, v0, v3 clamp 1378; GFX9-NEXT: v_sub_i32 v1, v1, v4 clamp 1379; GFX9-NEXT: v_sub_i32 v2, v2, v5 clamp 1380; GFX9-NEXT: s_setpc_b64 s[30:31] 1381; 1382; GFX10-LABEL: v_ssubsat_v3i32: 1383; GFX10: ; %bb.0: 1384; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1385; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1386; GFX10-NEXT: v_sub_nc_i32 v0, v0, v3 clamp 1387; GFX10-NEXT: v_sub_nc_i32 v1, v1, v4 clamp 1388; GFX10-NEXT: v_sub_nc_i32 v2, v2, v5 clamp 1389; GFX10-NEXT: s_setpc_b64 s[30:31] 1390 %result = call <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs) 1391 ret <3 x i32> %result 1392} 1393 1394define amdgpu_ps <3 x i32> @s_ssubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inreg %rhs) { 1395; GFX6-LABEL: s_ssubsat_v3i32: 1396; GFX6: ; %bb.0: 1397; GFX6-NEXT: s_brev_b32 s6, -2 1398; GFX6-NEXT: s_max_i32 s8, s0, -1 1399; GFX6-NEXT: s_brev_b32 s7, 1 1400; GFX6-NEXT: s_sub_i32 s8, s8, s6 1401; GFX6-NEXT: s_min_i32 s9, s0, -1 1402; GFX6-NEXT: s_sub_i32 s9, s9, s7 1403; GFX6-NEXT: s_max_i32 s3, s8, s3 1404; GFX6-NEXT: s_min_i32 s3, s3, s9 1405; GFX6-NEXT: s_sub_i32 s0, s0, s3 1406; GFX6-NEXT: s_max_i32 s3, s1, -1 1407; GFX6-NEXT: s_sub_i32 s3, s3, s6 1408; GFX6-NEXT: s_min_i32 s8, s1, -1 1409; GFX6-NEXT: s_sub_i32 s8, s8, s7 1410; GFX6-NEXT: s_max_i32 s3, s3, s4 1411; GFX6-NEXT: s_min_i32 s3, s3, s8 1412; GFX6-NEXT: s_sub_i32 s1, s1, s3 1413; GFX6-NEXT: s_max_i32 s3, s2, -1 1414; GFX6-NEXT: s_sub_i32 s3, s3, s6 1415; GFX6-NEXT: s_min_i32 s4, s2, -1 1416; GFX6-NEXT: s_sub_i32 s4, s4, s7 1417; GFX6-NEXT: s_max_i32 s3, s3, s5 1418; GFX6-NEXT: s_min_i32 s3, s3, s4 1419; GFX6-NEXT: s_sub_i32 s2, s2, s3 1420; GFX6-NEXT: ; return to shader part epilog 1421; 1422; GFX8-LABEL: s_ssubsat_v3i32: 1423; GFX8: ; %bb.0: 1424; GFX8-NEXT: s_brev_b32 s6, -2 1425; GFX8-NEXT: s_max_i32 s8, s0, -1 1426; GFX8-NEXT: s_brev_b32 s7, 1 1427; GFX8-NEXT: s_sub_i32 s8, s8, s6 1428; GFX8-NEXT: s_min_i32 s9, s0, -1 1429; GFX8-NEXT: s_sub_i32 s9, s9, s7 1430; GFX8-NEXT: s_max_i32 s3, s8, s3 1431; GFX8-NEXT: s_min_i32 s3, s3, s9 1432; GFX8-NEXT: s_sub_i32 s0, s0, s3 1433; GFX8-NEXT: s_max_i32 s3, s1, -1 1434; GFX8-NEXT: s_sub_i32 s3, s3, s6 1435; GFX8-NEXT: s_min_i32 s8, s1, -1 1436; GFX8-NEXT: s_sub_i32 s8, s8, s7 1437; GFX8-NEXT: s_max_i32 s3, s3, s4 1438; GFX8-NEXT: s_min_i32 s3, s3, s8 1439; GFX8-NEXT: s_sub_i32 s1, s1, s3 1440; GFX8-NEXT: s_max_i32 s3, s2, -1 1441; GFX8-NEXT: s_sub_i32 s3, s3, s6 1442; GFX8-NEXT: s_min_i32 s4, s2, -1 1443; GFX8-NEXT: s_sub_i32 s4, s4, s7 1444; GFX8-NEXT: s_max_i32 s3, s3, s5 1445; GFX8-NEXT: s_min_i32 s3, s3, s4 1446; GFX8-NEXT: s_sub_i32 s2, s2, s3 1447; GFX8-NEXT: ; return to shader part epilog 1448; 1449; GFX9-LABEL: s_ssubsat_v3i32: 1450; GFX9: ; %bb.0: 1451; GFX9-NEXT: v_mov_b32_e32 v0, s3 1452; GFX9-NEXT: v_mov_b32_e32 v1, s4 1453; GFX9-NEXT: v_mov_b32_e32 v2, s5 1454; GFX9-NEXT: v_sub_i32 v0, s0, v0 clamp 1455; GFX9-NEXT: v_sub_i32 v1, s1, v1 clamp 1456; GFX9-NEXT: v_sub_i32 v2, s2, v2 clamp 1457; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1458; GFX9-NEXT: v_readfirstlane_b32 s1, v1 1459; GFX9-NEXT: v_readfirstlane_b32 s2, v2 1460; GFX9-NEXT: ; return to shader part epilog 1461; 1462; GFX10-LABEL: s_ssubsat_v3i32: 1463; GFX10: ; %bb.0: 1464; GFX10-NEXT: v_sub_nc_i32 v0, s0, s3 clamp 1465; GFX10-NEXT: v_sub_nc_i32 v1, s1, s4 clamp 1466; GFX10-NEXT: v_sub_nc_i32 v2, s2, s5 clamp 1467; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1468; GFX10-NEXT: v_readfirstlane_b32 s1, v1 1469; GFX10-NEXT: v_readfirstlane_b32 s2, v2 1470; GFX10-NEXT: ; return to shader part epilog 1471 %result = call <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs) 1472 ret <3 x i32> %result 1473} 1474 1475define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 1476; GFX6-LABEL: v_ssubsat_v4i32: 1477; GFX6: ; %bb.0: 1478; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1479; GFX6-NEXT: s_brev_b32 s4, -2 1480; GFX6-NEXT: v_max_i32_e32 v8, -1, v0 1481; GFX6-NEXT: s_brev_b32 s5, 1 1482; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s4, v8 1483; GFX6-NEXT: v_min_i32_e32 v9, -1, v0 1484; GFX6-NEXT: v_subrev_i32_e32 v9, vcc, s5, v9 1485; GFX6-NEXT: v_max_i32_e32 v4, v8, v4 1486; GFX6-NEXT: v_min_i32_e32 v4, v4, v9 1487; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 1488; GFX6-NEXT: v_max_i32_e32 v4, -1, v1 1489; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 1490; GFX6-NEXT: v_min_i32_e32 v8, -1, v1 1491; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s5, v8 1492; GFX6-NEXT: v_max_i32_e32 v4, v4, v5 1493; GFX6-NEXT: v_min_i32_e32 v4, v4, v8 1494; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 1495; GFX6-NEXT: v_max_i32_e32 v4, -1, v2 1496; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 1497; GFX6-NEXT: v_min_i32_e32 v5, -1, v2 1498; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 1499; GFX6-NEXT: v_max_i32_e32 v4, v4, v6 1500; GFX6-NEXT: v_min_i32_e32 v4, v4, v5 1501; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 1502; GFX6-NEXT: v_max_i32_e32 v4, -1, v3 1503; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 0x7fffffff, v4 1504; GFX6-NEXT: v_min_i32_e32 v5, -1, v3 1505; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 0x80000000, v5 1506; GFX6-NEXT: v_max_i32_e32 v4, v4, v7 1507; GFX6-NEXT: v_min_i32_e32 v4, v4, v5 1508; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v4 1509; GFX6-NEXT: s_setpc_b64 s[30:31] 1510; 1511; GFX8-LABEL: v_ssubsat_v4i32: 1512; GFX8: ; %bb.0: 1513; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1514; GFX8-NEXT: s_brev_b32 s4, -2 1515; GFX8-NEXT: v_max_i32_e32 v8, -1, v0 1516; GFX8-NEXT: s_brev_b32 s5, 1 1517; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, s4, v8 1518; GFX8-NEXT: v_min_i32_e32 v9, -1, v0 1519; GFX8-NEXT: v_subrev_u32_e32 v9, vcc, s5, v9 1520; GFX8-NEXT: v_max_i32_e32 v4, v8, v4 1521; GFX8-NEXT: v_min_i32_e32 v4, v4, v9 1522; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v4 1523; GFX8-NEXT: v_max_i32_e32 v4, -1, v1 1524; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4 1525; GFX8-NEXT: v_min_i32_e32 v8, -1, v1 1526; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, s5, v8 1527; GFX8-NEXT: v_max_i32_e32 v4, v4, v5 1528; GFX8-NEXT: v_min_i32_e32 v4, v4, v8 1529; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v4 1530; GFX8-NEXT: v_max_i32_e32 v4, -1, v2 1531; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s4, v4 1532; GFX8-NEXT: v_min_i32_e32 v5, -1, v2 1533; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s5, v5 1534; GFX8-NEXT: v_max_i32_e32 v4, v4, v6 1535; GFX8-NEXT: v_min_i32_e32 v4, v4, v5 1536; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v4 1537; GFX8-NEXT: v_max_i32_e32 v4, -1, v3 1538; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 0x7fffffff, v4 1539; GFX8-NEXT: v_min_i32_e32 v5, -1, v3 1540; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, 0x80000000, v5 1541; GFX8-NEXT: v_max_i32_e32 v4, v4, v7 1542; GFX8-NEXT: v_min_i32_e32 v4, v4, v5 1543; GFX8-NEXT: v_sub_u32_e32 v3, vcc, v3, v4 1544; GFX8-NEXT: s_setpc_b64 s[30:31] 1545; 1546; GFX9-LABEL: v_ssubsat_v4i32: 1547; GFX9: ; %bb.0: 1548; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1549; GFX9-NEXT: v_sub_i32 v0, v0, v4 clamp 1550; GFX9-NEXT: v_sub_i32 v1, v1, v5 clamp 1551; GFX9-NEXT: v_sub_i32 v2, v2, v6 clamp 1552; GFX9-NEXT: v_sub_i32 v3, v3, v7 clamp 1553; GFX9-NEXT: s_setpc_b64 s[30:31] 1554; 1555; GFX10-LABEL: v_ssubsat_v4i32: 1556; GFX10: ; %bb.0: 1557; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1558; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1559; GFX10-NEXT: v_sub_nc_i32 v0, v0, v4 clamp 1560; GFX10-NEXT: v_sub_nc_i32 v1, v1, v5 clamp 1561; GFX10-NEXT: v_sub_nc_i32 v2, v2, v6 clamp 1562; GFX10-NEXT: v_sub_nc_i32 v3, v3, v7 clamp 1563; GFX10-NEXT: s_setpc_b64 s[30:31] 1564 %result = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 1565 ret <4 x i32> %result 1566} 1567 1568define amdgpu_ps <4 x i32> @s_ssubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inreg %rhs) { 1569; GFX6-LABEL: s_ssubsat_v4i32: 1570; GFX6: ; %bb.0: 1571; GFX6-NEXT: s_brev_b32 s8, -2 1572; GFX6-NEXT: s_max_i32 s10, s0, -1 1573; GFX6-NEXT: s_brev_b32 s9, 1 1574; GFX6-NEXT: s_sub_i32 s10, s10, s8 1575; GFX6-NEXT: s_min_i32 s11, s0, -1 1576; GFX6-NEXT: s_sub_i32 s11, s11, s9 1577; GFX6-NEXT: s_max_i32 s4, s10, s4 1578; GFX6-NEXT: s_min_i32 s4, s4, s11 1579; GFX6-NEXT: s_sub_i32 s0, s0, s4 1580; GFX6-NEXT: s_max_i32 s4, s1, -1 1581; GFX6-NEXT: s_sub_i32 s4, s4, s8 1582; GFX6-NEXT: s_min_i32 s10, s1, -1 1583; GFX6-NEXT: s_sub_i32 s10, s10, s9 1584; GFX6-NEXT: s_max_i32 s4, s4, s5 1585; GFX6-NEXT: s_min_i32 s4, s4, s10 1586; GFX6-NEXT: s_sub_i32 s1, s1, s4 1587; GFX6-NEXT: s_max_i32 s4, s2, -1 1588; GFX6-NEXT: s_sub_i32 s4, s4, s8 1589; GFX6-NEXT: s_min_i32 s5, s2, -1 1590; GFX6-NEXT: s_sub_i32 s5, s5, s9 1591; GFX6-NEXT: s_max_i32 s4, s4, s6 1592; GFX6-NEXT: s_min_i32 s4, s4, s5 1593; GFX6-NEXT: s_sub_i32 s2, s2, s4 1594; GFX6-NEXT: s_max_i32 s4, s3, -1 1595; GFX6-NEXT: s_sub_i32 s4, s4, s8 1596; GFX6-NEXT: s_min_i32 s5, s3, -1 1597; GFX6-NEXT: s_sub_i32 s5, s5, s9 1598; GFX6-NEXT: s_max_i32 s4, s4, s7 1599; GFX6-NEXT: s_min_i32 s4, s4, s5 1600; GFX6-NEXT: s_sub_i32 s3, s3, s4 1601; GFX6-NEXT: ; return to shader part epilog 1602; 1603; GFX8-LABEL: s_ssubsat_v4i32: 1604; GFX8: ; %bb.0: 1605; GFX8-NEXT: s_brev_b32 s8, -2 1606; GFX8-NEXT: s_max_i32 s10, s0, -1 1607; GFX8-NEXT: s_brev_b32 s9, 1 1608; GFX8-NEXT: s_sub_i32 s10, s10, s8 1609; GFX8-NEXT: s_min_i32 s11, s0, -1 1610; GFX8-NEXT: s_sub_i32 s11, s11, s9 1611; GFX8-NEXT: s_max_i32 s4, s10, s4 1612; GFX8-NEXT: s_min_i32 s4, s4, s11 1613; GFX8-NEXT: s_sub_i32 s0, s0, s4 1614; GFX8-NEXT: s_max_i32 s4, s1, -1 1615; GFX8-NEXT: s_sub_i32 s4, s4, s8 1616; GFX8-NEXT: s_min_i32 s10, s1, -1 1617; GFX8-NEXT: s_sub_i32 s10, s10, s9 1618; GFX8-NEXT: s_max_i32 s4, s4, s5 1619; GFX8-NEXT: s_min_i32 s4, s4, s10 1620; GFX8-NEXT: s_sub_i32 s1, s1, s4 1621; GFX8-NEXT: s_max_i32 s4, s2, -1 1622; GFX8-NEXT: s_sub_i32 s4, s4, s8 1623; GFX8-NEXT: s_min_i32 s5, s2, -1 1624; GFX8-NEXT: s_sub_i32 s5, s5, s9 1625; GFX8-NEXT: s_max_i32 s4, s4, s6 1626; GFX8-NEXT: s_min_i32 s4, s4, s5 1627; GFX8-NEXT: s_sub_i32 s2, s2, s4 1628; GFX8-NEXT: s_max_i32 s4, s3, -1 1629; GFX8-NEXT: s_sub_i32 s4, s4, s8 1630; GFX8-NEXT: s_min_i32 s5, s3, -1 1631; GFX8-NEXT: s_sub_i32 s5, s5, s9 1632; GFX8-NEXT: s_max_i32 s4, s4, s7 1633; GFX8-NEXT: s_min_i32 s4, s4, s5 1634; GFX8-NEXT: s_sub_i32 s3, s3, s4 1635; GFX8-NEXT: ; return to shader part epilog 1636; 1637; GFX9-LABEL: s_ssubsat_v4i32: 1638; GFX9: ; %bb.0: 1639; GFX9-NEXT: v_mov_b32_e32 v0, s4 1640; GFX9-NEXT: v_mov_b32_e32 v1, s5 1641; GFX9-NEXT: v_mov_b32_e32 v2, s6 1642; GFX9-NEXT: v_mov_b32_e32 v3, s7 1643; GFX9-NEXT: v_sub_i32 v0, s0, v0 clamp 1644; GFX9-NEXT: v_sub_i32 v1, s1, v1 clamp 1645; GFX9-NEXT: v_sub_i32 v2, s2, v2 clamp 1646; GFX9-NEXT: v_sub_i32 v3, s3, v3 clamp 1647; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1648; GFX9-NEXT: v_readfirstlane_b32 s1, v1 1649; GFX9-NEXT: v_readfirstlane_b32 s2, v2 1650; GFX9-NEXT: v_readfirstlane_b32 s3, v3 1651; GFX9-NEXT: ; return to shader part epilog 1652; 1653; GFX10-LABEL: s_ssubsat_v4i32: 1654; GFX10: ; %bb.0: 1655; GFX10-NEXT: v_sub_nc_i32 v0, s0, s4 clamp 1656; GFX10-NEXT: v_sub_nc_i32 v1, s1, s5 clamp 1657; GFX10-NEXT: v_sub_nc_i32 v2, s2, s6 clamp 1658; GFX10-NEXT: v_sub_nc_i32 v3, s3, s7 clamp 1659; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1660; GFX10-NEXT: v_readfirstlane_b32 s1, v1 1661; GFX10-NEXT: v_readfirstlane_b32 s2, v2 1662; GFX10-NEXT: v_readfirstlane_b32 s3, v3 1663; GFX10-NEXT: ; return to shader part epilog 1664 %result = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 1665 ret <4 x i32> %result 1666} 1667 1668define <5 x i32> @v_ssubsat_v5i32(<5 x i32> %lhs, <5 x i32> %rhs) { 1669; GFX6-LABEL: v_ssubsat_v5i32: 1670; GFX6: ; %bb.0: 1671; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1672; GFX6-NEXT: s_brev_b32 s4, -2 1673; GFX6-NEXT: v_max_i32_e32 v10, -1, v0 1674; GFX6-NEXT: s_brev_b32 s5, 1 1675; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s4, v10 1676; GFX6-NEXT: v_min_i32_e32 v12, -1, v0 1677; GFX6-NEXT: v_subrev_i32_e32 v12, vcc, s5, v12 1678; GFX6-NEXT: v_max_i32_e32 v5, v10, v5 1679; GFX6-NEXT: v_min_i32_e32 v5, v5, v12 1680; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 1681; GFX6-NEXT: v_max_i32_e32 v5, -1, v1 1682; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v5 1683; GFX6-NEXT: v_min_i32_e32 v10, -1, v1 1684; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s5, v10 1685; GFX6-NEXT: v_max_i32_e32 v5, v5, v6 1686; GFX6-NEXT: v_min_i32_e32 v5, v5, v10 1687; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 1688; GFX6-NEXT: v_max_i32_e32 v5, -1, v2 1689; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v5 1690; GFX6-NEXT: v_min_i32_e32 v6, -1, v2 1691; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, s5, v6 1692; GFX6-NEXT: v_max_i32_e32 v5, v5, v7 1693; GFX6-NEXT: v_min_i32_e32 v5, v5, v6 1694; GFX6-NEXT: v_bfrev_b32_e32 v11, -2 1695; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 1696; GFX6-NEXT: v_max_i32_e32 v5, -1, v3 1697; GFX6-NEXT: v_bfrev_b32_e32 v13, 1 1698; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v11 1699; GFX6-NEXT: v_min_i32_e32 v6, -1, v3 1700; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v13 1701; GFX6-NEXT: v_max_i32_e32 v5, v5, v8 1702; GFX6-NEXT: v_min_i32_e32 v5, v5, v6 1703; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v5 1704; GFX6-NEXT: v_max_i32_e32 v5, -1, v4 1705; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v11 1706; GFX6-NEXT: v_min_i32_e32 v6, -1, v4 1707; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v13 1708; GFX6-NEXT: v_max_i32_e32 v5, v5, v9 1709; GFX6-NEXT: v_min_i32_e32 v5, v5, v6 1710; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v5 1711; GFX6-NEXT: s_setpc_b64 s[30:31] 1712; 1713; GFX8-LABEL: v_ssubsat_v5i32: 1714; GFX8: ; %bb.0: 1715; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1716; GFX8-NEXT: s_brev_b32 s4, -2 1717; GFX8-NEXT: v_max_i32_e32 v10, -1, v0 1718; GFX8-NEXT: s_brev_b32 s5, 1 1719; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, s4, v10 1720; GFX8-NEXT: v_min_i32_e32 v12, -1, v0 1721; GFX8-NEXT: v_subrev_u32_e32 v12, vcc, s5, v12 1722; GFX8-NEXT: v_max_i32_e32 v5, v10, v5 1723; GFX8-NEXT: v_min_i32_e32 v5, v5, v12 1724; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v5 1725; GFX8-NEXT: v_max_i32_e32 v5, -1, v1 1726; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s4, v5 1727; GFX8-NEXT: v_min_i32_e32 v10, -1, v1 1728; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, s5, v10 1729; GFX8-NEXT: v_max_i32_e32 v5, v5, v6 1730; GFX8-NEXT: v_min_i32_e32 v5, v5, v10 1731; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v5 1732; GFX8-NEXT: v_max_i32_e32 v5, -1, v2 1733; GFX8-NEXT: v_subrev_u32_e32 v5, vcc, s4, v5 1734; GFX8-NEXT: v_min_i32_e32 v6, -1, v2 1735; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, s5, v6 1736; GFX8-NEXT: v_max_i32_e32 v5, v5, v7 1737; GFX8-NEXT: v_min_i32_e32 v5, v5, v6 1738; GFX8-NEXT: v_bfrev_b32_e32 v11, -2 1739; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v5 1740; GFX8-NEXT: v_max_i32_e32 v5, -1, v3 1741; GFX8-NEXT: v_bfrev_b32_e32 v13, 1 1742; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v11 1743; GFX8-NEXT: v_min_i32_e32 v6, -1, v3 1744; GFX8-NEXT: v_sub_u32_e32 v6, vcc, v6, v13 1745; GFX8-NEXT: v_max_i32_e32 v5, v5, v8 1746; GFX8-NEXT: v_min_i32_e32 v5, v5, v6 1747; GFX8-NEXT: v_sub_u32_e32 v3, vcc, v3, v5 1748; GFX8-NEXT: v_max_i32_e32 v5, -1, v4 1749; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v11 1750; GFX8-NEXT: v_min_i32_e32 v6, -1, v4 1751; GFX8-NEXT: v_sub_u32_e32 v6, vcc, v6, v13 1752; GFX8-NEXT: v_max_i32_e32 v5, v5, v9 1753; GFX8-NEXT: v_min_i32_e32 v5, v5, v6 1754; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v5 1755; GFX8-NEXT: s_setpc_b64 s[30:31] 1756; 1757; GFX9-LABEL: v_ssubsat_v5i32: 1758; GFX9: ; %bb.0: 1759; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1760; GFX9-NEXT: v_sub_i32 v0, v0, v5 clamp 1761; GFX9-NEXT: v_sub_i32 v1, v1, v6 clamp 1762; GFX9-NEXT: v_sub_i32 v2, v2, v7 clamp 1763; GFX9-NEXT: v_sub_i32 v3, v3, v8 clamp 1764; GFX9-NEXT: v_sub_i32 v4, v4, v9 clamp 1765; GFX9-NEXT: s_setpc_b64 s[30:31] 1766; 1767; GFX10-LABEL: v_ssubsat_v5i32: 1768; GFX10: ; %bb.0: 1769; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1770; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1771; GFX10-NEXT: v_sub_nc_i32 v0, v0, v5 clamp 1772; GFX10-NEXT: v_sub_nc_i32 v1, v1, v6 clamp 1773; GFX10-NEXT: v_sub_nc_i32 v2, v2, v7 clamp 1774; GFX10-NEXT: v_sub_nc_i32 v3, v3, v8 clamp 1775; GFX10-NEXT: v_sub_nc_i32 v4, v4, v9 clamp 1776; GFX10-NEXT: s_setpc_b64 s[30:31] 1777 %result = call <5 x i32> @llvm.ssub.sat.v5i32(<5 x i32> %lhs, <5 x i32> %rhs) 1778 ret <5 x i32> %result 1779} 1780 1781define amdgpu_ps <5 x i32> @s_ssubsat_v5i32(<5 x i32> inreg %lhs, <5 x i32> inreg %rhs) { 1782; GFX6-LABEL: s_ssubsat_v5i32: 1783; GFX6: ; %bb.0: 1784; GFX6-NEXT: s_brev_b32 s10, -2 1785; GFX6-NEXT: s_max_i32 s12, s0, -1 1786; GFX6-NEXT: s_brev_b32 s11, 1 1787; GFX6-NEXT: s_sub_i32 s12, s12, s10 1788; GFX6-NEXT: s_min_i32 s13, s0, -1 1789; GFX6-NEXT: s_sub_i32 s13, s13, s11 1790; GFX6-NEXT: s_max_i32 s5, s12, s5 1791; GFX6-NEXT: s_min_i32 s5, s5, s13 1792; GFX6-NEXT: s_sub_i32 s0, s0, s5 1793; GFX6-NEXT: s_max_i32 s5, s1, -1 1794; GFX6-NEXT: s_sub_i32 s5, s5, s10 1795; GFX6-NEXT: s_min_i32 s12, s1, -1 1796; GFX6-NEXT: s_sub_i32 s12, s12, s11 1797; GFX6-NEXT: s_max_i32 s5, s5, s6 1798; GFX6-NEXT: s_min_i32 s5, s5, s12 1799; GFX6-NEXT: s_sub_i32 s1, s1, s5 1800; GFX6-NEXT: s_max_i32 s5, s2, -1 1801; GFX6-NEXT: s_sub_i32 s5, s5, s10 1802; GFX6-NEXT: s_min_i32 s6, s2, -1 1803; GFX6-NEXT: s_sub_i32 s6, s6, s11 1804; GFX6-NEXT: s_max_i32 s5, s5, s7 1805; GFX6-NEXT: s_min_i32 s5, s5, s6 1806; GFX6-NEXT: s_sub_i32 s2, s2, s5 1807; GFX6-NEXT: s_max_i32 s5, s3, -1 1808; GFX6-NEXT: s_sub_i32 s5, s5, s10 1809; GFX6-NEXT: s_min_i32 s6, s3, -1 1810; GFX6-NEXT: s_sub_i32 s6, s6, s11 1811; GFX6-NEXT: s_max_i32 s5, s5, s8 1812; GFX6-NEXT: s_min_i32 s5, s5, s6 1813; GFX6-NEXT: s_sub_i32 s3, s3, s5 1814; GFX6-NEXT: s_max_i32 s5, s4, -1 1815; GFX6-NEXT: s_sub_i32 s5, s5, s10 1816; GFX6-NEXT: s_min_i32 s6, s4, -1 1817; GFX6-NEXT: s_sub_i32 s6, s6, s11 1818; GFX6-NEXT: s_max_i32 s5, s5, s9 1819; GFX6-NEXT: s_min_i32 s5, s5, s6 1820; GFX6-NEXT: s_sub_i32 s4, s4, s5 1821; GFX6-NEXT: ; return to shader part epilog 1822; 1823; GFX8-LABEL: s_ssubsat_v5i32: 1824; GFX8: ; %bb.0: 1825; GFX8-NEXT: s_brev_b32 s10, -2 1826; GFX8-NEXT: s_max_i32 s12, s0, -1 1827; GFX8-NEXT: s_brev_b32 s11, 1 1828; GFX8-NEXT: s_sub_i32 s12, s12, s10 1829; GFX8-NEXT: s_min_i32 s13, s0, -1 1830; GFX8-NEXT: s_sub_i32 s13, s13, s11 1831; GFX8-NEXT: s_max_i32 s5, s12, s5 1832; GFX8-NEXT: s_min_i32 s5, s5, s13 1833; GFX8-NEXT: s_sub_i32 s0, s0, s5 1834; GFX8-NEXT: s_max_i32 s5, s1, -1 1835; GFX8-NEXT: s_sub_i32 s5, s5, s10 1836; GFX8-NEXT: s_min_i32 s12, s1, -1 1837; GFX8-NEXT: s_sub_i32 s12, s12, s11 1838; GFX8-NEXT: s_max_i32 s5, s5, s6 1839; GFX8-NEXT: s_min_i32 s5, s5, s12 1840; GFX8-NEXT: s_sub_i32 s1, s1, s5 1841; GFX8-NEXT: s_max_i32 s5, s2, -1 1842; GFX8-NEXT: s_sub_i32 s5, s5, s10 1843; GFX8-NEXT: s_min_i32 s6, s2, -1 1844; GFX8-NEXT: s_sub_i32 s6, s6, s11 1845; GFX8-NEXT: s_max_i32 s5, s5, s7 1846; GFX8-NEXT: s_min_i32 s5, s5, s6 1847; GFX8-NEXT: s_sub_i32 s2, s2, s5 1848; GFX8-NEXT: s_max_i32 s5, s3, -1 1849; GFX8-NEXT: s_sub_i32 s5, s5, s10 1850; GFX8-NEXT: s_min_i32 s6, s3, -1 1851; GFX8-NEXT: s_sub_i32 s6, s6, s11 1852; GFX8-NEXT: s_max_i32 s5, s5, s8 1853; GFX8-NEXT: s_min_i32 s5, s5, s6 1854; GFX8-NEXT: s_sub_i32 s3, s3, s5 1855; GFX8-NEXT: s_max_i32 s5, s4, -1 1856; GFX8-NEXT: s_sub_i32 s5, s5, s10 1857; GFX8-NEXT: s_min_i32 s6, s4, -1 1858; GFX8-NEXT: s_sub_i32 s6, s6, s11 1859; GFX8-NEXT: s_max_i32 s5, s5, s9 1860; GFX8-NEXT: s_min_i32 s5, s5, s6 1861; GFX8-NEXT: s_sub_i32 s4, s4, s5 1862; GFX8-NEXT: ; return to shader part epilog 1863; 1864; GFX9-LABEL: s_ssubsat_v5i32: 1865; GFX9: ; %bb.0: 1866; GFX9-NEXT: v_mov_b32_e32 v0, s5 1867; GFX9-NEXT: v_mov_b32_e32 v1, s6 1868; GFX9-NEXT: v_mov_b32_e32 v2, s7 1869; GFX9-NEXT: v_mov_b32_e32 v3, s8 1870; GFX9-NEXT: v_mov_b32_e32 v4, s9 1871; GFX9-NEXT: v_sub_i32 v0, s0, v0 clamp 1872; GFX9-NEXT: v_sub_i32 v1, s1, v1 clamp 1873; GFX9-NEXT: v_sub_i32 v2, s2, v2 clamp 1874; GFX9-NEXT: v_sub_i32 v3, s3, v3 clamp 1875; GFX9-NEXT: v_sub_i32 v4, s4, v4 clamp 1876; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1877; GFX9-NEXT: v_readfirstlane_b32 s1, v1 1878; GFX9-NEXT: v_readfirstlane_b32 s2, v2 1879; GFX9-NEXT: v_readfirstlane_b32 s3, v3 1880; GFX9-NEXT: v_readfirstlane_b32 s4, v4 1881; GFX9-NEXT: ; return to shader part epilog 1882; 1883; GFX10-LABEL: s_ssubsat_v5i32: 1884; GFX10: ; %bb.0: 1885; GFX10-NEXT: v_sub_nc_i32 v0, s0, s5 clamp 1886; GFX10-NEXT: v_sub_nc_i32 v1, s1, s6 clamp 1887; GFX10-NEXT: v_sub_nc_i32 v2, s2, s7 clamp 1888; GFX10-NEXT: v_sub_nc_i32 v3, s3, s8 clamp 1889; GFX10-NEXT: v_sub_nc_i32 v4, s4, s9 clamp 1890; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1891; GFX10-NEXT: v_readfirstlane_b32 s1, v1 1892; GFX10-NEXT: v_readfirstlane_b32 s2, v2 1893; GFX10-NEXT: v_readfirstlane_b32 s3, v3 1894; GFX10-NEXT: v_readfirstlane_b32 s4, v4 1895; GFX10-NEXT: ; return to shader part epilog 1896 %result = call <5 x i32> @llvm.ssub.sat.v5i32(<5 x i32> %lhs, <5 x i32> %rhs) 1897 ret <5 x i32> %result 1898} 1899 1900define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) { 1901; GFX6-LABEL: v_ssubsat_v16i32: 1902; GFX6: ; %bb.0: 1903; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1904; GFX6-NEXT: s_brev_b32 s4, -2 1905; GFX6-NEXT: v_max_i32_e32 v32, -1, v0 1906; GFX6-NEXT: v_subrev_i32_e32 v32, vcc, s4, v32 1907; GFX6-NEXT: v_max_i32_e32 v16, v32, v16 1908; GFX6-NEXT: s_brev_b32 s5, 1 1909; GFX6-NEXT: v_min_i32_e32 v32, -1, v0 1910; GFX6-NEXT: v_subrev_i32_e32 v32, vcc, s5, v32 1911; GFX6-NEXT: v_min_i32_e32 v16, v16, v32 1912; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v16 1913; GFX6-NEXT: v_max_i32_e32 v16, -1, v1 1914; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, s4, v16 1915; GFX6-NEXT: v_max_i32_e32 v16, v16, v17 1916; GFX6-NEXT: v_min_i32_e32 v17, -1, v1 1917; GFX6-NEXT: v_subrev_i32_e32 v17, vcc, s5, v17 1918; GFX6-NEXT: v_min_i32_e32 v16, v16, v17 1919; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v16 1920; GFX6-NEXT: v_max_i32_e32 v16, -1, v2 1921; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, s4, v16 1922; GFX6-NEXT: v_min_i32_e32 v17, -1, v2 1923; GFX6-NEXT: v_max_i32_e32 v16, v16, v18 1924; GFX6-NEXT: v_subrev_i32_e32 v17, vcc, s5, v17 1925; GFX6-NEXT: v_min_i32_e32 v16, v16, v17 1926; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v16 1927; GFX6-NEXT: v_bfrev_b32_e32 v16, -2 1928; GFX6-NEXT: v_max_i32_e32 v17, -1, v3 1929; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v16 1930; GFX6-NEXT: v_max_i32_e32 v17, v17, v19 1931; GFX6-NEXT: v_bfrev_b32_e32 v18, 1 1932; GFX6-NEXT: v_min_i32_e32 v19, -1, v3 1933; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v18 1934; GFX6-NEXT: v_min_i32_e32 v17, v17, v19 1935; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v17 1936; GFX6-NEXT: v_max_i32_e32 v17, -1, v4 1937; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v16 1938; GFX6-NEXT: v_min_i32_e32 v19, -1, v4 1939; GFX6-NEXT: v_max_i32_e32 v17, v17, v20 1940; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v18 1941; GFX6-NEXT: v_min_i32_e32 v17, v17, v19 1942; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v17 1943; GFX6-NEXT: v_max_i32_e32 v17, -1, v5 1944; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v16 1945; GFX6-NEXT: v_min_i32_e32 v19, -1, v5 1946; GFX6-NEXT: v_max_i32_e32 v17, v17, v21 1947; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v18 1948; GFX6-NEXT: v_min_i32_e32 v17, v17, v19 1949; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v17 1950; GFX6-NEXT: v_max_i32_e32 v17, -1, v6 1951; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v16 1952; GFX6-NEXT: v_min_i32_e32 v19, -1, v6 1953; GFX6-NEXT: v_max_i32_e32 v17, v17, v22 1954; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v18 1955; GFX6-NEXT: v_min_i32_e32 v17, v17, v19 1956; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v17 1957; GFX6-NEXT: v_max_i32_e32 v17, -1, v7 1958; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v16 1959; GFX6-NEXT: v_min_i32_e32 v19, -1, v7 1960; GFX6-NEXT: v_max_i32_e32 v17, v17, v23 1961; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v18 1962; GFX6-NEXT: v_min_i32_e32 v17, v17, v19 1963; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v17 1964; GFX6-NEXT: v_max_i32_e32 v17, -1, v8 1965; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v16 1966; GFX6-NEXT: v_min_i32_e32 v19, -1, v8 1967; GFX6-NEXT: v_max_i32_e32 v17, v17, v24 1968; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v18 1969; GFX6-NEXT: v_min_i32_e32 v17, v17, v19 1970; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v17 1971; GFX6-NEXT: v_max_i32_e32 v17, -1, v9 1972; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v16 1973; GFX6-NEXT: v_min_i32_e32 v19, -1, v9 1974; GFX6-NEXT: v_max_i32_e32 v17, v17, v25 1975; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v18 1976; GFX6-NEXT: v_min_i32_e32 v17, v17, v19 1977; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17 1978; GFX6-NEXT: v_max_i32_e32 v17, -1, v10 1979; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v16 1980; GFX6-NEXT: v_min_i32_e32 v19, -1, v10 1981; GFX6-NEXT: v_max_i32_e32 v17, v17, v26 1982; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v18 1983; GFX6-NEXT: v_min_i32_e32 v17, v17, v19 1984; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v17 1985; GFX6-NEXT: v_max_i32_e32 v17, -1, v11 1986; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v16 1987; GFX6-NEXT: v_min_i32_e32 v19, -1, v11 1988; GFX6-NEXT: v_max_i32_e32 v17, v17, v27 1989; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v18 1990; GFX6-NEXT: v_min_i32_e32 v17, v17, v19 1991; GFX6-NEXT: v_sub_i32_e32 v11, vcc, v11, v17 1992; GFX6-NEXT: v_max_i32_e32 v17, -1, v12 1993; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v16 1994; GFX6-NEXT: v_min_i32_e32 v19, -1, v12 1995; GFX6-NEXT: v_max_i32_e32 v17, v17, v28 1996; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v18 1997; GFX6-NEXT: v_min_i32_e32 v17, v17, v19 1998; GFX6-NEXT: v_sub_i32_e32 v12, vcc, v12, v17 1999; GFX6-NEXT: v_max_i32_e32 v17, -1, v13 2000; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v16 2001; GFX6-NEXT: v_min_i32_e32 v19, -1, v13 2002; GFX6-NEXT: v_max_i32_e32 v17, v17, v29 2003; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v18 2004; GFX6-NEXT: v_min_i32_e32 v17, v17, v19 2005; GFX6-NEXT: v_sub_i32_e32 v13, vcc, v13, v17 2006; GFX6-NEXT: v_max_i32_e32 v17, -1, v14 2007; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v16 2008; GFX6-NEXT: v_min_i32_e32 v19, -1, v14 2009; GFX6-NEXT: v_max_i32_e32 v17, v17, v30 2010; GFX6-NEXT: v_sub_i32_e32 v19, vcc, v19, v18 2011; GFX6-NEXT: v_min_i32_e32 v17, v17, v19 2012; GFX6-NEXT: v_sub_i32_e32 v14, vcc, v14, v17 2013; GFX6-NEXT: v_max_i32_e32 v17, -1, v15 2014; GFX6-NEXT: v_sub_i32_e32 v16, vcc, v17, v16 2015; GFX6-NEXT: v_min_i32_e32 v17, -1, v15 2016; GFX6-NEXT: v_sub_i32_e32 v17, vcc, v17, v18 2017; GFX6-NEXT: v_max_i32_e32 v16, v16, v31 2018; GFX6-NEXT: v_min_i32_e32 v16, v16, v17 2019; GFX6-NEXT: v_sub_i32_e32 v15, vcc, v15, v16 2020; GFX6-NEXT: s_setpc_b64 s[30:31] 2021; 2022; GFX8-LABEL: v_ssubsat_v16i32: 2023; GFX8: ; %bb.0: 2024; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2025; GFX8-NEXT: s_brev_b32 s4, -2 2026; GFX8-NEXT: v_max_i32_e32 v32, -1, v0 2027; GFX8-NEXT: v_subrev_u32_e32 v32, vcc, s4, v32 2028; GFX8-NEXT: v_max_i32_e32 v16, v32, v16 2029; GFX8-NEXT: s_brev_b32 s5, 1 2030; GFX8-NEXT: v_min_i32_e32 v32, -1, v0 2031; GFX8-NEXT: v_subrev_u32_e32 v32, vcc, s5, v32 2032; GFX8-NEXT: v_min_i32_e32 v16, v16, v32 2033; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v16 2034; GFX8-NEXT: v_max_i32_e32 v16, -1, v1 2035; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, s4, v16 2036; GFX8-NEXT: v_max_i32_e32 v16, v16, v17 2037; GFX8-NEXT: v_min_i32_e32 v17, -1, v1 2038; GFX8-NEXT: v_subrev_u32_e32 v17, vcc, s5, v17 2039; GFX8-NEXT: v_min_i32_e32 v16, v16, v17 2040; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v16 2041; GFX8-NEXT: v_max_i32_e32 v16, -1, v2 2042; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, s4, v16 2043; GFX8-NEXT: v_min_i32_e32 v17, -1, v2 2044; GFX8-NEXT: v_max_i32_e32 v16, v16, v18 2045; GFX8-NEXT: v_subrev_u32_e32 v17, vcc, s5, v17 2046; GFX8-NEXT: v_min_i32_e32 v16, v16, v17 2047; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v16 2048; GFX8-NEXT: v_bfrev_b32_e32 v16, -2 2049; GFX8-NEXT: v_max_i32_e32 v17, -1, v3 2050; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v16 2051; GFX8-NEXT: v_max_i32_e32 v17, v17, v19 2052; GFX8-NEXT: v_bfrev_b32_e32 v18, 1 2053; GFX8-NEXT: v_min_i32_e32 v19, -1, v3 2054; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v18 2055; GFX8-NEXT: v_min_i32_e32 v17, v17, v19 2056; GFX8-NEXT: v_sub_u32_e32 v3, vcc, v3, v17 2057; GFX8-NEXT: v_max_i32_e32 v17, -1, v4 2058; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v16 2059; GFX8-NEXT: v_min_i32_e32 v19, -1, v4 2060; GFX8-NEXT: v_max_i32_e32 v17, v17, v20 2061; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v18 2062; GFX8-NEXT: v_min_i32_e32 v17, v17, v19 2063; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v17 2064; GFX8-NEXT: v_max_i32_e32 v17, -1, v5 2065; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v16 2066; GFX8-NEXT: v_min_i32_e32 v19, -1, v5 2067; GFX8-NEXT: v_max_i32_e32 v17, v17, v21 2068; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v18 2069; GFX8-NEXT: v_min_i32_e32 v17, v17, v19 2070; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v5, v17 2071; GFX8-NEXT: v_max_i32_e32 v17, -1, v6 2072; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v16 2073; GFX8-NEXT: v_min_i32_e32 v19, -1, v6 2074; GFX8-NEXT: v_max_i32_e32 v17, v17, v22 2075; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v18 2076; GFX8-NEXT: v_min_i32_e32 v17, v17, v19 2077; GFX8-NEXT: v_sub_u32_e32 v6, vcc, v6, v17 2078; GFX8-NEXT: v_max_i32_e32 v17, -1, v7 2079; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v16 2080; GFX8-NEXT: v_min_i32_e32 v19, -1, v7 2081; GFX8-NEXT: v_max_i32_e32 v17, v17, v23 2082; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v18 2083; GFX8-NEXT: v_min_i32_e32 v17, v17, v19 2084; GFX8-NEXT: v_sub_u32_e32 v7, vcc, v7, v17 2085; GFX8-NEXT: v_max_i32_e32 v17, -1, v8 2086; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v16 2087; GFX8-NEXT: v_min_i32_e32 v19, -1, v8 2088; GFX8-NEXT: v_max_i32_e32 v17, v17, v24 2089; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v18 2090; GFX8-NEXT: v_min_i32_e32 v17, v17, v19 2091; GFX8-NEXT: v_sub_u32_e32 v8, vcc, v8, v17 2092; GFX8-NEXT: v_max_i32_e32 v17, -1, v9 2093; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v16 2094; GFX8-NEXT: v_min_i32_e32 v19, -1, v9 2095; GFX8-NEXT: v_max_i32_e32 v17, v17, v25 2096; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v18 2097; GFX8-NEXT: v_min_i32_e32 v17, v17, v19 2098; GFX8-NEXT: v_sub_u32_e32 v9, vcc, v9, v17 2099; GFX8-NEXT: v_max_i32_e32 v17, -1, v10 2100; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v16 2101; GFX8-NEXT: v_min_i32_e32 v19, -1, v10 2102; GFX8-NEXT: v_max_i32_e32 v17, v17, v26 2103; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v18 2104; GFX8-NEXT: v_min_i32_e32 v17, v17, v19 2105; GFX8-NEXT: v_sub_u32_e32 v10, vcc, v10, v17 2106; GFX8-NEXT: v_max_i32_e32 v17, -1, v11 2107; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v16 2108; GFX8-NEXT: v_min_i32_e32 v19, -1, v11 2109; GFX8-NEXT: v_max_i32_e32 v17, v17, v27 2110; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v18 2111; GFX8-NEXT: v_min_i32_e32 v17, v17, v19 2112; GFX8-NEXT: v_sub_u32_e32 v11, vcc, v11, v17 2113; GFX8-NEXT: v_max_i32_e32 v17, -1, v12 2114; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v16 2115; GFX8-NEXT: v_min_i32_e32 v19, -1, v12 2116; GFX8-NEXT: v_max_i32_e32 v17, v17, v28 2117; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v18 2118; GFX8-NEXT: v_min_i32_e32 v17, v17, v19 2119; GFX8-NEXT: v_sub_u32_e32 v12, vcc, v12, v17 2120; GFX8-NEXT: v_max_i32_e32 v17, -1, v13 2121; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v16 2122; GFX8-NEXT: v_min_i32_e32 v19, -1, v13 2123; GFX8-NEXT: v_max_i32_e32 v17, v17, v29 2124; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v18 2125; GFX8-NEXT: v_min_i32_e32 v17, v17, v19 2126; GFX8-NEXT: v_sub_u32_e32 v13, vcc, v13, v17 2127; GFX8-NEXT: v_max_i32_e32 v17, -1, v14 2128; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v16 2129; GFX8-NEXT: v_min_i32_e32 v19, -1, v14 2130; GFX8-NEXT: v_max_i32_e32 v17, v17, v30 2131; GFX8-NEXT: v_sub_u32_e32 v19, vcc, v19, v18 2132; GFX8-NEXT: v_min_i32_e32 v17, v17, v19 2133; GFX8-NEXT: v_sub_u32_e32 v14, vcc, v14, v17 2134; GFX8-NEXT: v_max_i32_e32 v17, -1, v15 2135; GFX8-NEXT: v_sub_u32_e32 v16, vcc, v17, v16 2136; GFX8-NEXT: v_min_i32_e32 v17, -1, v15 2137; GFX8-NEXT: v_sub_u32_e32 v17, vcc, v17, v18 2138; GFX8-NEXT: v_max_i32_e32 v16, v16, v31 2139; GFX8-NEXT: v_min_i32_e32 v16, v16, v17 2140; GFX8-NEXT: v_sub_u32_e32 v15, vcc, v15, v16 2141; GFX8-NEXT: s_setpc_b64 s[30:31] 2142; 2143; GFX9-LABEL: v_ssubsat_v16i32: 2144; GFX9: ; %bb.0: 2145; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2146; GFX9-NEXT: v_sub_i32 v0, v0, v16 clamp 2147; GFX9-NEXT: v_sub_i32 v1, v1, v17 clamp 2148; GFX9-NEXT: v_sub_i32 v2, v2, v18 clamp 2149; GFX9-NEXT: v_sub_i32 v3, v3, v19 clamp 2150; GFX9-NEXT: v_sub_i32 v4, v4, v20 clamp 2151; GFX9-NEXT: v_sub_i32 v5, v5, v21 clamp 2152; GFX9-NEXT: v_sub_i32 v6, v6, v22 clamp 2153; GFX9-NEXT: v_sub_i32 v7, v7, v23 clamp 2154; GFX9-NEXT: v_sub_i32 v8, v8, v24 clamp 2155; GFX9-NEXT: v_sub_i32 v9, v9, v25 clamp 2156; GFX9-NEXT: v_sub_i32 v10, v10, v26 clamp 2157; GFX9-NEXT: v_sub_i32 v11, v11, v27 clamp 2158; GFX9-NEXT: v_sub_i32 v12, v12, v28 clamp 2159; GFX9-NEXT: v_sub_i32 v13, v13, v29 clamp 2160; GFX9-NEXT: v_sub_i32 v14, v14, v30 clamp 2161; GFX9-NEXT: v_sub_i32 v15, v15, v31 clamp 2162; GFX9-NEXT: s_setpc_b64 s[30:31] 2163; 2164; GFX10-LABEL: v_ssubsat_v16i32: 2165; GFX10: ; %bb.0: 2166; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2167; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2168; GFX10-NEXT: v_sub_nc_i32 v0, v0, v16 clamp 2169; GFX10-NEXT: v_sub_nc_i32 v1, v1, v17 clamp 2170; GFX10-NEXT: v_sub_nc_i32 v2, v2, v18 clamp 2171; GFX10-NEXT: v_sub_nc_i32 v3, v3, v19 clamp 2172; GFX10-NEXT: v_sub_nc_i32 v4, v4, v20 clamp 2173; GFX10-NEXT: v_sub_nc_i32 v5, v5, v21 clamp 2174; GFX10-NEXT: v_sub_nc_i32 v6, v6, v22 clamp 2175; GFX10-NEXT: v_sub_nc_i32 v7, v7, v23 clamp 2176; GFX10-NEXT: v_sub_nc_i32 v8, v8, v24 clamp 2177; GFX10-NEXT: v_sub_nc_i32 v9, v9, v25 clamp 2178; GFX10-NEXT: v_sub_nc_i32 v10, v10, v26 clamp 2179; GFX10-NEXT: v_sub_nc_i32 v11, v11, v27 clamp 2180; GFX10-NEXT: v_sub_nc_i32 v12, v12, v28 clamp 2181; GFX10-NEXT: v_sub_nc_i32 v13, v13, v29 clamp 2182; GFX10-NEXT: v_sub_nc_i32 v14, v14, v30 clamp 2183; GFX10-NEXT: v_sub_nc_i32 v15, v15, v31 clamp 2184; GFX10-NEXT: s_setpc_b64 s[30:31] 2185 %result = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs) 2186 ret <16 x i32> %result 2187} 2188 2189define amdgpu_ps <16 x i32> @s_ssubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32> inreg %rhs) { 2190; GFX6-LABEL: s_ssubsat_v16i32: 2191; GFX6: ; %bb.0: 2192; GFX6-NEXT: s_brev_b32 s32, -2 2193; GFX6-NEXT: s_max_i32 s34, s0, -1 2194; GFX6-NEXT: s_brev_b32 s33, 1 2195; GFX6-NEXT: s_sub_i32 s34, s34, s32 2196; GFX6-NEXT: s_min_i32 s35, s0, -1 2197; GFX6-NEXT: s_sub_i32 s35, s35, s33 2198; GFX6-NEXT: s_max_i32 s16, s34, s16 2199; GFX6-NEXT: s_min_i32 s16, s16, s35 2200; GFX6-NEXT: s_sub_i32 s0, s0, s16 2201; GFX6-NEXT: s_max_i32 s16, s1, -1 2202; GFX6-NEXT: s_sub_i32 s16, s16, s32 2203; GFX6-NEXT: s_min_i32 s34, s1, -1 2204; GFX6-NEXT: s_sub_i32 s34, s34, s33 2205; GFX6-NEXT: s_max_i32 s16, s16, s17 2206; GFX6-NEXT: s_min_i32 s16, s16, s34 2207; GFX6-NEXT: s_sub_i32 s1, s1, s16 2208; GFX6-NEXT: s_max_i32 s16, s2, -1 2209; GFX6-NEXT: s_sub_i32 s16, s16, s32 2210; GFX6-NEXT: s_min_i32 s17, s2, -1 2211; GFX6-NEXT: s_sub_i32 s17, s17, s33 2212; GFX6-NEXT: s_max_i32 s16, s16, s18 2213; GFX6-NEXT: s_min_i32 s16, s16, s17 2214; GFX6-NEXT: s_sub_i32 s2, s2, s16 2215; GFX6-NEXT: s_max_i32 s16, s3, -1 2216; GFX6-NEXT: s_sub_i32 s16, s16, s32 2217; GFX6-NEXT: s_min_i32 s17, s3, -1 2218; GFX6-NEXT: s_sub_i32 s17, s17, s33 2219; GFX6-NEXT: s_max_i32 s16, s16, s19 2220; GFX6-NEXT: s_min_i32 s16, s16, s17 2221; GFX6-NEXT: s_sub_i32 s3, s3, s16 2222; GFX6-NEXT: s_max_i32 s16, s4, -1 2223; GFX6-NEXT: s_sub_i32 s16, s16, s32 2224; GFX6-NEXT: s_min_i32 s17, s4, -1 2225; GFX6-NEXT: s_sub_i32 s17, s17, s33 2226; GFX6-NEXT: s_max_i32 s16, s16, s20 2227; GFX6-NEXT: s_min_i32 s16, s16, s17 2228; GFX6-NEXT: s_sub_i32 s4, s4, s16 2229; GFX6-NEXT: s_max_i32 s16, s5, -1 2230; GFX6-NEXT: s_sub_i32 s16, s16, s32 2231; GFX6-NEXT: s_min_i32 s17, s5, -1 2232; GFX6-NEXT: s_sub_i32 s17, s17, s33 2233; GFX6-NEXT: s_max_i32 s16, s16, s21 2234; GFX6-NEXT: s_min_i32 s16, s16, s17 2235; GFX6-NEXT: s_sub_i32 s5, s5, s16 2236; GFX6-NEXT: s_max_i32 s16, s6, -1 2237; GFX6-NEXT: s_sub_i32 s16, s16, s32 2238; GFX6-NEXT: s_min_i32 s17, s6, -1 2239; GFX6-NEXT: s_sub_i32 s17, s17, s33 2240; GFX6-NEXT: s_max_i32 s16, s16, s22 2241; GFX6-NEXT: s_min_i32 s16, s16, s17 2242; GFX6-NEXT: s_sub_i32 s6, s6, s16 2243; GFX6-NEXT: s_max_i32 s16, s7, -1 2244; GFX6-NEXT: s_sub_i32 s16, s16, s32 2245; GFX6-NEXT: s_min_i32 s17, s7, -1 2246; GFX6-NEXT: s_sub_i32 s17, s17, s33 2247; GFX6-NEXT: s_max_i32 s16, s16, s23 2248; GFX6-NEXT: s_min_i32 s16, s16, s17 2249; GFX6-NEXT: s_sub_i32 s7, s7, s16 2250; GFX6-NEXT: s_max_i32 s16, s8, -1 2251; GFX6-NEXT: s_sub_i32 s16, s16, s32 2252; GFX6-NEXT: s_min_i32 s17, s8, -1 2253; GFX6-NEXT: s_sub_i32 s17, s17, s33 2254; GFX6-NEXT: s_max_i32 s16, s16, s24 2255; GFX6-NEXT: s_min_i32 s16, s16, s17 2256; GFX6-NEXT: s_sub_i32 s8, s8, s16 2257; GFX6-NEXT: s_max_i32 s16, s9, -1 2258; GFX6-NEXT: s_sub_i32 s16, s16, s32 2259; GFX6-NEXT: s_min_i32 s17, s9, -1 2260; GFX6-NEXT: s_sub_i32 s17, s17, s33 2261; GFX6-NEXT: s_max_i32 s16, s16, s25 2262; GFX6-NEXT: s_min_i32 s16, s16, s17 2263; GFX6-NEXT: s_sub_i32 s9, s9, s16 2264; GFX6-NEXT: s_max_i32 s16, s10, -1 2265; GFX6-NEXT: s_sub_i32 s16, s16, s32 2266; GFX6-NEXT: s_min_i32 s17, s10, -1 2267; GFX6-NEXT: s_sub_i32 s17, s17, s33 2268; GFX6-NEXT: s_max_i32 s16, s16, s26 2269; GFX6-NEXT: s_min_i32 s16, s16, s17 2270; GFX6-NEXT: s_sub_i32 s10, s10, s16 2271; GFX6-NEXT: s_max_i32 s16, s11, -1 2272; GFX6-NEXT: s_sub_i32 s16, s16, s32 2273; GFX6-NEXT: s_min_i32 s17, s11, -1 2274; GFX6-NEXT: s_sub_i32 s17, s17, s33 2275; GFX6-NEXT: s_max_i32 s16, s16, s27 2276; GFX6-NEXT: s_min_i32 s16, s16, s17 2277; GFX6-NEXT: s_sub_i32 s11, s11, s16 2278; GFX6-NEXT: s_max_i32 s16, s12, -1 2279; GFX6-NEXT: s_sub_i32 s16, s16, s32 2280; GFX6-NEXT: s_min_i32 s17, s12, -1 2281; GFX6-NEXT: s_sub_i32 s17, s17, s33 2282; GFX6-NEXT: s_max_i32 s16, s16, s28 2283; GFX6-NEXT: s_min_i32 s16, s16, s17 2284; GFX6-NEXT: s_sub_i32 s12, s12, s16 2285; GFX6-NEXT: s_max_i32 s16, s13, -1 2286; GFX6-NEXT: s_sub_i32 s16, s16, s32 2287; GFX6-NEXT: s_min_i32 s17, s13, -1 2288; GFX6-NEXT: s_sub_i32 s17, s17, s33 2289; GFX6-NEXT: s_max_i32 s16, s16, s29 2290; GFX6-NEXT: s_min_i32 s16, s16, s17 2291; GFX6-NEXT: s_sub_i32 s13, s13, s16 2292; GFX6-NEXT: s_max_i32 s16, s14, -1 2293; GFX6-NEXT: s_sub_i32 s16, s16, s32 2294; GFX6-NEXT: s_min_i32 s17, s14, -1 2295; GFX6-NEXT: s_sub_i32 s17, s17, s33 2296; GFX6-NEXT: s_max_i32 s16, s16, s30 2297; GFX6-NEXT: s_min_i32 s16, s16, s17 2298; GFX6-NEXT: s_sub_i32 s14, s14, s16 2299; GFX6-NEXT: s_max_i32 s16, s15, -1 2300; GFX6-NEXT: s_sub_i32 s16, s16, s32 2301; GFX6-NEXT: s_min_i32 s17, s15, -1 2302; GFX6-NEXT: s_sub_i32 s17, s17, s33 2303; GFX6-NEXT: s_max_i32 s16, s16, s31 2304; GFX6-NEXT: s_min_i32 s16, s16, s17 2305; GFX6-NEXT: s_sub_i32 s15, s15, s16 2306; GFX6-NEXT: ; return to shader part epilog 2307; 2308; GFX8-LABEL: s_ssubsat_v16i32: 2309; GFX8: ; %bb.0: 2310; GFX8-NEXT: s_brev_b32 s32, -2 2311; GFX8-NEXT: s_max_i32 s34, s0, -1 2312; GFX8-NEXT: s_brev_b32 s33, 1 2313; GFX8-NEXT: s_sub_i32 s34, s34, s32 2314; GFX8-NEXT: s_min_i32 s35, s0, -1 2315; GFX8-NEXT: s_sub_i32 s35, s35, s33 2316; GFX8-NEXT: s_max_i32 s16, s34, s16 2317; GFX8-NEXT: s_min_i32 s16, s16, s35 2318; GFX8-NEXT: s_sub_i32 s0, s0, s16 2319; GFX8-NEXT: s_max_i32 s16, s1, -1 2320; GFX8-NEXT: s_sub_i32 s16, s16, s32 2321; GFX8-NEXT: s_min_i32 s34, s1, -1 2322; GFX8-NEXT: s_sub_i32 s34, s34, s33 2323; GFX8-NEXT: s_max_i32 s16, s16, s17 2324; GFX8-NEXT: s_min_i32 s16, s16, s34 2325; GFX8-NEXT: s_sub_i32 s1, s1, s16 2326; GFX8-NEXT: s_max_i32 s16, s2, -1 2327; GFX8-NEXT: s_sub_i32 s16, s16, s32 2328; GFX8-NEXT: s_min_i32 s17, s2, -1 2329; GFX8-NEXT: s_sub_i32 s17, s17, s33 2330; GFX8-NEXT: s_max_i32 s16, s16, s18 2331; GFX8-NEXT: s_min_i32 s16, s16, s17 2332; GFX8-NEXT: s_sub_i32 s2, s2, s16 2333; GFX8-NEXT: s_max_i32 s16, s3, -1 2334; GFX8-NEXT: s_sub_i32 s16, s16, s32 2335; GFX8-NEXT: s_min_i32 s17, s3, -1 2336; GFX8-NEXT: s_sub_i32 s17, s17, s33 2337; GFX8-NEXT: s_max_i32 s16, s16, s19 2338; GFX8-NEXT: s_min_i32 s16, s16, s17 2339; GFX8-NEXT: s_sub_i32 s3, s3, s16 2340; GFX8-NEXT: s_max_i32 s16, s4, -1 2341; GFX8-NEXT: s_sub_i32 s16, s16, s32 2342; GFX8-NEXT: s_min_i32 s17, s4, -1 2343; GFX8-NEXT: s_sub_i32 s17, s17, s33 2344; GFX8-NEXT: s_max_i32 s16, s16, s20 2345; GFX8-NEXT: s_min_i32 s16, s16, s17 2346; GFX8-NEXT: s_sub_i32 s4, s4, s16 2347; GFX8-NEXT: s_max_i32 s16, s5, -1 2348; GFX8-NEXT: s_sub_i32 s16, s16, s32 2349; GFX8-NEXT: s_min_i32 s17, s5, -1 2350; GFX8-NEXT: s_sub_i32 s17, s17, s33 2351; GFX8-NEXT: s_max_i32 s16, s16, s21 2352; GFX8-NEXT: s_min_i32 s16, s16, s17 2353; GFX8-NEXT: s_sub_i32 s5, s5, s16 2354; GFX8-NEXT: s_max_i32 s16, s6, -1 2355; GFX8-NEXT: s_sub_i32 s16, s16, s32 2356; GFX8-NEXT: s_min_i32 s17, s6, -1 2357; GFX8-NEXT: s_sub_i32 s17, s17, s33 2358; GFX8-NEXT: s_max_i32 s16, s16, s22 2359; GFX8-NEXT: s_min_i32 s16, s16, s17 2360; GFX8-NEXT: s_sub_i32 s6, s6, s16 2361; GFX8-NEXT: s_max_i32 s16, s7, -1 2362; GFX8-NEXT: s_sub_i32 s16, s16, s32 2363; GFX8-NEXT: s_min_i32 s17, s7, -1 2364; GFX8-NEXT: s_sub_i32 s17, s17, s33 2365; GFX8-NEXT: s_max_i32 s16, s16, s23 2366; GFX8-NEXT: s_min_i32 s16, s16, s17 2367; GFX8-NEXT: s_sub_i32 s7, s7, s16 2368; GFX8-NEXT: s_max_i32 s16, s8, -1 2369; GFX8-NEXT: s_sub_i32 s16, s16, s32 2370; GFX8-NEXT: s_min_i32 s17, s8, -1 2371; GFX8-NEXT: s_sub_i32 s17, s17, s33 2372; GFX8-NEXT: s_max_i32 s16, s16, s24 2373; GFX8-NEXT: s_min_i32 s16, s16, s17 2374; GFX8-NEXT: s_sub_i32 s8, s8, s16 2375; GFX8-NEXT: s_max_i32 s16, s9, -1 2376; GFX8-NEXT: s_sub_i32 s16, s16, s32 2377; GFX8-NEXT: s_min_i32 s17, s9, -1 2378; GFX8-NEXT: s_sub_i32 s17, s17, s33 2379; GFX8-NEXT: s_max_i32 s16, s16, s25 2380; GFX8-NEXT: s_min_i32 s16, s16, s17 2381; GFX8-NEXT: s_sub_i32 s9, s9, s16 2382; GFX8-NEXT: s_max_i32 s16, s10, -1 2383; GFX8-NEXT: s_sub_i32 s16, s16, s32 2384; GFX8-NEXT: s_min_i32 s17, s10, -1 2385; GFX8-NEXT: s_sub_i32 s17, s17, s33 2386; GFX8-NEXT: s_max_i32 s16, s16, s26 2387; GFX8-NEXT: s_min_i32 s16, s16, s17 2388; GFX8-NEXT: s_sub_i32 s10, s10, s16 2389; GFX8-NEXT: s_max_i32 s16, s11, -1 2390; GFX8-NEXT: s_sub_i32 s16, s16, s32 2391; GFX8-NEXT: s_min_i32 s17, s11, -1 2392; GFX8-NEXT: s_sub_i32 s17, s17, s33 2393; GFX8-NEXT: s_max_i32 s16, s16, s27 2394; GFX8-NEXT: s_min_i32 s16, s16, s17 2395; GFX8-NEXT: s_sub_i32 s11, s11, s16 2396; GFX8-NEXT: s_max_i32 s16, s12, -1 2397; GFX8-NEXT: s_sub_i32 s16, s16, s32 2398; GFX8-NEXT: s_min_i32 s17, s12, -1 2399; GFX8-NEXT: s_sub_i32 s17, s17, s33 2400; GFX8-NEXT: s_max_i32 s16, s16, s28 2401; GFX8-NEXT: s_min_i32 s16, s16, s17 2402; GFX8-NEXT: s_sub_i32 s12, s12, s16 2403; GFX8-NEXT: s_max_i32 s16, s13, -1 2404; GFX8-NEXT: s_sub_i32 s16, s16, s32 2405; GFX8-NEXT: s_min_i32 s17, s13, -1 2406; GFX8-NEXT: s_sub_i32 s17, s17, s33 2407; GFX8-NEXT: s_max_i32 s16, s16, s29 2408; GFX8-NEXT: s_min_i32 s16, s16, s17 2409; GFX8-NEXT: s_sub_i32 s13, s13, s16 2410; GFX8-NEXT: s_max_i32 s16, s14, -1 2411; GFX8-NEXT: s_sub_i32 s16, s16, s32 2412; GFX8-NEXT: s_min_i32 s17, s14, -1 2413; GFX8-NEXT: s_sub_i32 s17, s17, s33 2414; GFX8-NEXT: s_max_i32 s16, s16, s30 2415; GFX8-NEXT: s_min_i32 s16, s16, s17 2416; GFX8-NEXT: s_sub_i32 s14, s14, s16 2417; GFX8-NEXT: s_max_i32 s16, s15, -1 2418; GFX8-NEXT: s_sub_i32 s16, s16, s32 2419; GFX8-NEXT: s_min_i32 s17, s15, -1 2420; GFX8-NEXT: s_sub_i32 s17, s17, s33 2421; GFX8-NEXT: s_max_i32 s16, s16, s31 2422; GFX8-NEXT: s_min_i32 s16, s16, s17 2423; GFX8-NEXT: s_sub_i32 s15, s15, s16 2424; GFX8-NEXT: ; return to shader part epilog 2425; 2426; GFX9-LABEL: s_ssubsat_v16i32: 2427; GFX9: ; %bb.0: 2428; GFX9-NEXT: v_mov_b32_e32 v0, s16 2429; GFX9-NEXT: v_mov_b32_e32 v1, s17 2430; GFX9-NEXT: v_mov_b32_e32 v2, s18 2431; GFX9-NEXT: v_mov_b32_e32 v3, s19 2432; GFX9-NEXT: v_mov_b32_e32 v4, s20 2433; GFX9-NEXT: v_mov_b32_e32 v5, s21 2434; GFX9-NEXT: v_mov_b32_e32 v6, s22 2435; GFX9-NEXT: v_mov_b32_e32 v7, s23 2436; GFX9-NEXT: v_mov_b32_e32 v8, s24 2437; GFX9-NEXT: v_mov_b32_e32 v9, s25 2438; GFX9-NEXT: v_mov_b32_e32 v10, s26 2439; GFX9-NEXT: v_mov_b32_e32 v11, s27 2440; GFX9-NEXT: v_mov_b32_e32 v12, s28 2441; GFX9-NEXT: v_mov_b32_e32 v13, s29 2442; GFX9-NEXT: v_mov_b32_e32 v14, s30 2443; GFX9-NEXT: v_mov_b32_e32 v15, s31 2444; GFX9-NEXT: v_sub_i32 v0, s0, v0 clamp 2445; GFX9-NEXT: v_sub_i32 v1, s1, v1 clamp 2446; GFX9-NEXT: v_sub_i32 v2, s2, v2 clamp 2447; GFX9-NEXT: v_sub_i32 v3, s3, v3 clamp 2448; GFX9-NEXT: v_sub_i32 v4, s4, v4 clamp 2449; GFX9-NEXT: v_sub_i32 v5, s5, v5 clamp 2450; GFX9-NEXT: v_sub_i32 v6, s6, v6 clamp 2451; GFX9-NEXT: v_sub_i32 v7, s7, v7 clamp 2452; GFX9-NEXT: v_sub_i32 v8, s8, v8 clamp 2453; GFX9-NEXT: v_sub_i32 v9, s9, v9 clamp 2454; GFX9-NEXT: v_sub_i32 v10, s10, v10 clamp 2455; GFX9-NEXT: v_sub_i32 v11, s11, v11 clamp 2456; GFX9-NEXT: v_sub_i32 v12, s12, v12 clamp 2457; GFX9-NEXT: v_sub_i32 v13, s13, v13 clamp 2458; GFX9-NEXT: v_sub_i32 v14, s14, v14 clamp 2459; GFX9-NEXT: v_sub_i32 v15, s15, v15 clamp 2460; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2461; GFX9-NEXT: v_readfirstlane_b32 s1, v1 2462; GFX9-NEXT: v_readfirstlane_b32 s2, v2 2463; GFX9-NEXT: v_readfirstlane_b32 s3, v3 2464; GFX9-NEXT: v_readfirstlane_b32 s4, v4 2465; GFX9-NEXT: v_readfirstlane_b32 s5, v5 2466; GFX9-NEXT: v_readfirstlane_b32 s6, v6 2467; GFX9-NEXT: v_readfirstlane_b32 s7, v7 2468; GFX9-NEXT: v_readfirstlane_b32 s8, v8 2469; GFX9-NEXT: v_readfirstlane_b32 s9, v9 2470; GFX9-NEXT: v_readfirstlane_b32 s10, v10 2471; GFX9-NEXT: v_readfirstlane_b32 s11, v11 2472; GFX9-NEXT: v_readfirstlane_b32 s12, v12 2473; GFX9-NEXT: v_readfirstlane_b32 s13, v13 2474; GFX9-NEXT: v_readfirstlane_b32 s14, v14 2475; GFX9-NEXT: v_readfirstlane_b32 s15, v15 2476; GFX9-NEXT: ; return to shader part epilog 2477; 2478; GFX10-LABEL: s_ssubsat_v16i32: 2479; GFX10: ; %bb.0: 2480; GFX10-NEXT: v_sub_nc_i32 v0, s0, s16 clamp 2481; GFX10-NEXT: v_sub_nc_i32 v1, s1, s17 clamp 2482; GFX10-NEXT: v_sub_nc_i32 v2, s2, s18 clamp 2483; GFX10-NEXT: v_sub_nc_i32 v3, s3, s19 clamp 2484; GFX10-NEXT: v_sub_nc_i32 v4, s4, s20 clamp 2485; GFX10-NEXT: v_sub_nc_i32 v5, s5, s21 clamp 2486; GFX10-NEXT: v_sub_nc_i32 v6, s6, s22 clamp 2487; GFX10-NEXT: v_sub_nc_i32 v7, s7, s23 clamp 2488; GFX10-NEXT: v_sub_nc_i32 v8, s8, s24 clamp 2489; GFX10-NEXT: v_sub_nc_i32 v9, s9, s25 clamp 2490; GFX10-NEXT: v_sub_nc_i32 v10, s10, s26 clamp 2491; GFX10-NEXT: v_sub_nc_i32 v11, s11, s27 clamp 2492; GFX10-NEXT: v_sub_nc_i32 v12, s12, s28 clamp 2493; GFX10-NEXT: v_sub_nc_i32 v13, s13, s29 clamp 2494; GFX10-NEXT: v_sub_nc_i32 v14, s14, s30 clamp 2495; GFX10-NEXT: v_sub_nc_i32 v15, s15, s31 clamp 2496; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2497; GFX10-NEXT: v_readfirstlane_b32 s1, v1 2498; GFX10-NEXT: v_readfirstlane_b32 s2, v2 2499; GFX10-NEXT: v_readfirstlane_b32 s3, v3 2500; GFX10-NEXT: v_readfirstlane_b32 s4, v4 2501; GFX10-NEXT: v_readfirstlane_b32 s5, v5 2502; GFX10-NEXT: v_readfirstlane_b32 s6, v6 2503; GFX10-NEXT: v_readfirstlane_b32 s7, v7 2504; GFX10-NEXT: v_readfirstlane_b32 s8, v8 2505; GFX10-NEXT: v_readfirstlane_b32 s9, v9 2506; GFX10-NEXT: v_readfirstlane_b32 s10, v10 2507; GFX10-NEXT: v_readfirstlane_b32 s11, v11 2508; GFX10-NEXT: v_readfirstlane_b32 s12, v12 2509; GFX10-NEXT: v_readfirstlane_b32 s13, v13 2510; GFX10-NEXT: v_readfirstlane_b32 s14, v14 2511; GFX10-NEXT: v_readfirstlane_b32 s15, v15 2512; GFX10-NEXT: ; return to shader part epilog 2513 %result = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs) 2514 ret <16 x i32> %result 2515} 2516 2517define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) { 2518; GFX6-LABEL: v_ssubsat_i16: 2519; GFX6: ; %bb.0: 2520; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2521; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2522; GFX6-NEXT: v_max_i32_e32 v2, -1, v0 2523; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2524; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x7fffffff, v2 2525; GFX6-NEXT: v_min_i32_e32 v3, -1, v0 2526; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 0x80000000, v3 2527; GFX6-NEXT: v_max_i32_e32 v1, v2, v1 2528; GFX6-NEXT: v_min_i32_e32 v1, v1, v3 2529; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 2530; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 2531; GFX6-NEXT: s_setpc_b64 s[30:31] 2532; 2533; GFX8-LABEL: v_ssubsat_i16: 2534; GFX8: ; %bb.0: 2535; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2536; GFX8-NEXT: v_max_i16_e32 v2, -1, v0 2537; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2 2538; GFX8-NEXT: v_min_i16_e32 v3, -1, v0 2539; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3 2540; GFX8-NEXT: v_max_i16_e32 v1, v2, v1 2541; GFX8-NEXT: v_min_i16_e32 v1, v1, v3 2542; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 2543; GFX8-NEXT: s_setpc_b64 s[30:31] 2544; 2545; GFX9-LABEL: v_ssubsat_i16: 2546; GFX9: ; %bb.0: 2547; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2548; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp 2549; GFX9-NEXT: s_setpc_b64 s[30:31] 2550; 2551; GFX10-LABEL: v_ssubsat_i16: 2552; GFX10: ; %bb.0: 2553; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2554; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2555; GFX10-NEXT: v_sub_nc_i16 v0, v0, v1 clamp 2556; GFX10-NEXT: s_setpc_b64 s[30:31] 2557 %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) 2558 ret i16 %result 2559} 2560 2561define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 inreg %rhs) { 2562; GFX6-LABEL: s_ssubsat_i16: 2563; GFX6: ; %bb.0: 2564; GFX6-NEXT: s_lshl_b32 s0, s0, 16 2565; GFX6-NEXT: s_max_i32 s2, s0, -1 2566; GFX6-NEXT: s_lshl_b32 s1, s1, 16 2567; GFX6-NEXT: s_sub_i32 s2, s2, 0x7fffffff 2568; GFX6-NEXT: s_min_i32 s3, s0, -1 2569; GFX6-NEXT: s_sub_i32 s3, s3, 0x80000000 2570; GFX6-NEXT: s_max_i32 s1, s2, s1 2571; GFX6-NEXT: s_min_i32 s1, s1, s3 2572; GFX6-NEXT: s_sub_i32 s0, s0, s1 2573; GFX6-NEXT: s_ashr_i32 s0, s0, 16 2574; GFX6-NEXT: ; return to shader part epilog 2575; 2576; GFX8-LABEL: s_ssubsat_i16: 2577; GFX8: ; %bb.0: 2578; GFX8-NEXT: s_sext_i32_i16 s2, s0 2579; GFX8-NEXT: s_sext_i32_i16 s3, -1 2580; GFX8-NEXT: s_max_i32 s4, s2, s3 2581; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff 2582; GFX8-NEXT: s_min_i32 s2, s2, s3 2583; GFX8-NEXT: s_sext_i32_i16 s3, s4 2584; GFX8-NEXT: s_sext_i32_i16 s1, s1 2585; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000 2586; GFX8-NEXT: s_max_i32 s1, s3, s1 2587; GFX8-NEXT: s_sext_i32_i16 s1, s1 2588; GFX8-NEXT: s_sext_i32_i16 s2, s2 2589; GFX8-NEXT: s_min_i32 s1, s1, s2 2590; GFX8-NEXT: s_sub_i32 s0, s0, s1 2591; GFX8-NEXT: ; return to shader part epilog 2592; 2593; GFX9-LABEL: s_ssubsat_i16: 2594; GFX9: ; %bb.0: 2595; GFX9-NEXT: v_mov_b32_e32 v0, s1 2596; GFX9-NEXT: v_sub_i16 v0, s0, v0 clamp 2597; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2598; GFX9-NEXT: ; return to shader part epilog 2599; 2600; GFX10-LABEL: s_ssubsat_i16: 2601; GFX10: ; %bb.0: 2602; GFX10-NEXT: v_sub_nc_i16 v0, s0, s1 clamp 2603; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2604; GFX10-NEXT: ; return to shader part epilog 2605 %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) 2606 ret i16 %result 2607} 2608 2609define amdgpu_ps half @ssubsat_i16_sv(i16 inreg %lhs, i16 %rhs) { 2610; GFX6-LABEL: ssubsat_i16_sv: 2611; GFX6: ; %bb.0: 2612; GFX6-NEXT: s_lshl_b32 s0, s0, 16 2613; GFX6-NEXT: s_max_i32 s1, s0, -1 2614; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2615; GFX6-NEXT: s_sub_i32 s1, s1, 0x7fffffff 2616; GFX6-NEXT: s_min_i32 s2, s0, -1 2617; GFX6-NEXT: s_sub_i32 s2, s2, 0x80000000 2618; GFX6-NEXT: v_max_i32_e32 v0, s1, v0 2619; GFX6-NEXT: v_min_i32_e32 v0, s2, v0 2620; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 2621; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 2622; GFX6-NEXT: ; return to shader part epilog 2623; 2624; GFX8-LABEL: ssubsat_i16_sv: 2625; GFX8: ; %bb.0: 2626; GFX8-NEXT: s_sext_i32_i16 s1, s0 2627; GFX8-NEXT: s_sext_i32_i16 s2, -1 2628; GFX8-NEXT: s_max_i32 s3, s1, s2 2629; GFX8-NEXT: s_sub_i32 s3, s3, 0x7fff 2630; GFX8-NEXT: s_min_i32 s1, s1, s2 2631; GFX8-NEXT: s_sub_i32 s1, s1, 0xffff8000 2632; GFX8-NEXT: v_max_i16_e32 v0, s3, v0 2633; GFX8-NEXT: v_min_i16_e32 v0, s1, v0 2634; GFX8-NEXT: v_sub_u16_e32 v0, s0, v0 2635; GFX8-NEXT: ; return to shader part epilog 2636; 2637; GFX9-LABEL: ssubsat_i16_sv: 2638; GFX9: ; %bb.0: 2639; GFX9-NEXT: v_sub_i16 v0, s0, v0 clamp 2640; GFX9-NEXT: ; return to shader part epilog 2641; 2642; GFX10-LABEL: ssubsat_i16_sv: 2643; GFX10: ; %bb.0: 2644; GFX10-NEXT: v_sub_nc_i16 v0, s0, v0 clamp 2645; GFX10-NEXT: ; return to shader part epilog 2646 %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) 2647 %cast = bitcast i16 %result to half 2648 ret half %cast 2649} 2650 2651define amdgpu_ps half @ssubsat_i16_vs(i16 %lhs, i16 inreg %rhs) { 2652; GFX6-LABEL: ssubsat_i16_vs: 2653; GFX6: ; %bb.0: 2654; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2655; GFX6-NEXT: v_max_i32_e32 v1, -1, v0 2656; GFX6-NEXT: s_lshl_b32 s0, s0, 16 2657; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 0x7fffffff, v1 2658; GFX6-NEXT: v_min_i32_e32 v2, -1, v0 2659; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 0x80000000, v2 2660; GFX6-NEXT: v_max_i32_e32 v1, s0, v1 2661; GFX6-NEXT: v_min_i32_e32 v1, v1, v2 2662; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 2663; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 2664; GFX6-NEXT: ; return to shader part epilog 2665; 2666; GFX8-LABEL: ssubsat_i16_vs: 2667; GFX8: ; %bb.0: 2668; GFX8-NEXT: v_max_i16_e32 v1, -1, v0 2669; GFX8-NEXT: v_subrev_u16_e32 v1, 0x7fff, v1 2670; GFX8-NEXT: v_min_i16_e32 v2, -1, v0 2671; GFX8-NEXT: v_subrev_u16_e32 v2, 0x8000, v2 2672; GFX8-NEXT: v_max_i16_e32 v1, s0, v1 2673; GFX8-NEXT: v_min_i16_e32 v1, v1, v2 2674; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 2675; GFX8-NEXT: ; return to shader part epilog 2676; 2677; GFX9-LABEL: ssubsat_i16_vs: 2678; GFX9: ; %bb.0: 2679; GFX9-NEXT: v_sub_i16 v0, v0, s0 clamp 2680; GFX9-NEXT: ; return to shader part epilog 2681; 2682; GFX10-LABEL: ssubsat_i16_vs: 2683; GFX10: ; %bb.0: 2684; GFX10-NEXT: v_sub_nc_i16 v0, v0, s0 clamp 2685; GFX10-NEXT: ; return to shader part epilog 2686 %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) 2687 %cast = bitcast i16 %result to half 2688 ret half %cast 2689} 2690 2691define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { 2692; GFX6-LABEL: v_ssubsat_v2i16: 2693; GFX6: ; %bb.0: 2694; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2695; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2696; GFX6-NEXT: s_brev_b32 s4, -2 2697; GFX6-NEXT: v_max_i32_e32 v4, -1, v0 2698; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2699; GFX6-NEXT: s_brev_b32 s5, 1 2700; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s4, v4 2701; GFX6-NEXT: v_min_i32_e32 v5, -1, v0 2702; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s5, v5 2703; GFX6-NEXT: v_max_i32_e32 v2, v4, v2 2704; GFX6-NEXT: v_min_i32_e32 v2, v2, v5 2705; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2706; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 2707; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 2708; GFX6-NEXT: v_max_i32_e32 v3, -1, v1 2709; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s4, v3 2710; GFX6-NEXT: v_min_i32_e32 v4, -1, v1 2711; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s5, v4 2712; GFX6-NEXT: v_max_i32_e32 v2, v3, v2 2713; GFX6-NEXT: v_min_i32_e32 v2, v2, v4 2714; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 2715; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 2716; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1 2717; GFX6-NEXT: s_setpc_b64 s[30:31] 2718; 2719; GFX8-LABEL: v_ssubsat_v2i16: 2720; GFX8: ; %bb.0: 2721; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2722; GFX8-NEXT: s_movk_i32 s4, 0x7fff 2723; GFX8-NEXT: v_max_i16_e32 v3, -1, v0 2724; GFX8-NEXT: s_movk_i32 s5, 0x8000 2725; GFX8-NEXT: v_subrev_u16_e32 v3, s4, v3 2726; GFX8-NEXT: v_min_i16_e32 v4, -1, v0 2727; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 2728; GFX8-NEXT: v_subrev_u16_e32 v4, s5, v4 2729; GFX8-NEXT: v_max_i16_e32 v3, v3, v1 2730; GFX8-NEXT: v_min_i16_e32 v3, v3, v4 2731; GFX8-NEXT: v_max_i16_e32 v4, -1, v2 2732; GFX8-NEXT: v_subrev_u16_e32 v4, s4, v4 2733; GFX8-NEXT: v_min_i16_e32 v5, -1, v2 2734; GFX8-NEXT: v_subrev_u16_e32 v5, s5, v5 2735; GFX8-NEXT: v_max_i16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 2736; GFX8-NEXT: v_min_i16_e32 v1, v1, v5 2737; GFX8-NEXT: v_sub_u16_e32 v0, v0, v3 2738; GFX8-NEXT: v_sub_u16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2739; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2740; GFX8-NEXT: s_setpc_b64 s[30:31] 2741; 2742; GFX9-LABEL: v_ssubsat_v2i16: 2743; GFX9: ; %bb.0: 2744; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2745; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 clamp 2746; GFX9-NEXT: s_setpc_b64 s[30:31] 2747; 2748; GFX10-LABEL: v_ssubsat_v2i16: 2749; GFX10: ; %bb.0: 2750; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2751; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2752; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 clamp 2753; GFX10-NEXT: s_setpc_b64 s[30:31] 2754 %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) 2755 ret <2 x i16> %result 2756} 2757 2758define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs) { 2759; GFX6-LABEL: s_ssubsat_v2i16: 2760; GFX6: ; %bb.0: 2761; GFX6-NEXT: s_lshl_b32 s0, s0, 16 2762; GFX6-NEXT: s_brev_b32 s4, -2 2763; GFX6-NEXT: s_max_i32 s6, s0, -1 2764; GFX6-NEXT: s_lshl_b32 s2, s2, 16 2765; GFX6-NEXT: s_brev_b32 s5, 1 2766; GFX6-NEXT: s_sub_i32 s6, s6, s4 2767; GFX6-NEXT: s_min_i32 s7, s0, -1 2768; GFX6-NEXT: s_sub_i32 s7, s7, s5 2769; GFX6-NEXT: s_max_i32 s2, s6, s2 2770; GFX6-NEXT: s_min_i32 s2, s2, s7 2771; GFX6-NEXT: s_lshl_b32 s1, s1, 16 2772; GFX6-NEXT: s_sub_i32 s0, s0, s2 2773; GFX6-NEXT: s_lshl_b32 s2, s3, 16 2774; GFX6-NEXT: s_max_i32 s3, s1, -1 2775; GFX6-NEXT: s_sub_i32 s3, s3, s4 2776; GFX6-NEXT: s_min_i32 s4, s1, -1 2777; GFX6-NEXT: s_sub_i32 s4, s4, s5 2778; GFX6-NEXT: s_max_i32 s2, s3, s2 2779; GFX6-NEXT: s_min_i32 s2, s2, s4 2780; GFX6-NEXT: s_sub_i32 s1, s1, s2 2781; GFX6-NEXT: s_ashr_i32 s1, s1, 16 2782; GFX6-NEXT: s_mov_b32 s2, 0xffff 2783; GFX6-NEXT: s_ashr_i32 s0, s0, 16 2784; GFX6-NEXT: s_and_b32 s1, s1, s2 2785; GFX6-NEXT: s_and_b32 s0, s0, s2 2786; GFX6-NEXT: s_lshl_b32 s1, s1, 16 2787; GFX6-NEXT: s_or_b32 s0, s0, s1 2788; GFX6-NEXT: ; return to shader part epilog 2789; 2790; GFX8-LABEL: s_ssubsat_v2i16: 2791; GFX8: ; %bb.0: 2792; GFX8-NEXT: s_sext_i32_i16 s6, s0 2793; GFX8-NEXT: s_sext_i32_i16 s7, -1 2794; GFX8-NEXT: s_movk_i32 s4, 0x7fff 2795; GFX8-NEXT: s_max_i32 s8, s6, s7 2796; GFX8-NEXT: s_sub_i32 s8, s8, s4 2797; GFX8-NEXT: s_lshr_b32 s3, s1, 16 2798; GFX8-NEXT: s_movk_i32 s5, 0x8000 2799; GFX8-NEXT: s_min_i32 s6, s6, s7 2800; GFX8-NEXT: s_sext_i32_i16 s8, s8 2801; GFX8-NEXT: s_sext_i32_i16 s1, s1 2802; GFX8-NEXT: s_sub_i32 s6, s6, s5 2803; GFX8-NEXT: s_max_i32 s1, s8, s1 2804; GFX8-NEXT: s_sext_i32_i16 s1, s1 2805; GFX8-NEXT: s_sext_i32_i16 s6, s6 2806; GFX8-NEXT: s_lshr_b32 s2, s0, 16 2807; GFX8-NEXT: s_min_i32 s1, s1, s6 2808; GFX8-NEXT: s_sub_i32 s0, s0, s1 2809; GFX8-NEXT: s_sext_i32_i16 s1, s2 2810; GFX8-NEXT: s_max_i32 s6, s1, s7 2811; GFX8-NEXT: s_sub_i32 s4, s6, s4 2812; GFX8-NEXT: s_min_i32 s1, s1, s7 2813; GFX8-NEXT: s_sext_i32_i16 s4, s4 2814; GFX8-NEXT: s_sext_i32_i16 s3, s3 2815; GFX8-NEXT: s_sub_i32 s1, s1, s5 2816; GFX8-NEXT: s_max_i32 s3, s4, s3 2817; GFX8-NEXT: s_sext_i32_i16 s3, s3 2818; GFX8-NEXT: s_sext_i32_i16 s1, s1 2819; GFX8-NEXT: s_min_i32 s1, s3, s1 2820; GFX8-NEXT: s_sub_i32 s1, s2, s1 2821; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 2822; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 2823; GFX8-NEXT: s_lshl_b32 s1, s1, 16 2824; GFX8-NEXT: s_or_b32 s0, s0, s1 2825; GFX8-NEXT: ; return to shader part epilog 2826; 2827; GFX9-LABEL: s_ssubsat_v2i16: 2828; GFX9: ; %bb.0: 2829; GFX9-NEXT: v_mov_b32_e32 v0, s1 2830; GFX9-NEXT: v_pk_sub_i16 v0, s0, v0 clamp 2831; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2832; GFX9-NEXT: ; return to shader part epilog 2833; 2834; GFX10-LABEL: s_ssubsat_v2i16: 2835; GFX10: ; %bb.0: 2836; GFX10-NEXT: v_pk_sub_i16 v0, s0, s1 clamp 2837; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2838; GFX10-NEXT: ; return to shader part epilog 2839 %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) 2840 %cast = bitcast <2 x i16> %result to i32 2841 ret i32 %cast 2842} 2843 2844define amdgpu_ps float @ssubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) { 2845; GFX6-LABEL: ssubsat_v2i16_sv: 2846; GFX6: ; %bb.0: 2847; GFX6-NEXT: s_lshl_b32 s0, s0, 16 2848; GFX6-NEXT: s_brev_b32 s2, -2 2849; GFX6-NEXT: s_max_i32 s4, s0, -1 2850; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2851; GFX6-NEXT: s_brev_b32 s3, 1 2852; GFX6-NEXT: s_sub_i32 s4, s4, s2 2853; GFX6-NEXT: s_min_i32 s5, s0, -1 2854; GFX6-NEXT: s_sub_i32 s5, s5, s3 2855; GFX6-NEXT: v_max_i32_e32 v0, s4, v0 2856; GFX6-NEXT: v_min_i32_e32 v0, s5, v0 2857; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 2858; GFX6-NEXT: s_lshl_b32 s0, s1, 16 2859; GFX6-NEXT: s_max_i32 s1, s0, -1 2860; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2861; GFX6-NEXT: s_sub_i32 s1, s1, s2 2862; GFX6-NEXT: s_min_i32 s2, s0, -1 2863; GFX6-NEXT: s_sub_i32 s2, s2, s3 2864; GFX6-NEXT: v_max_i32_e32 v1, s1, v1 2865; GFX6-NEXT: v_min_i32_e32 v1, s2, v1 2866; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 2867; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1 2868; GFX6-NEXT: s_mov_b32 s0, 0xffff 2869; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 2870; GFX6-NEXT: v_and_b32_e32 v1, s0, v1 2871; GFX6-NEXT: v_and_b32_e32 v0, s0, v0 2872; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2873; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 2874; GFX6-NEXT: ; return to shader part epilog 2875; 2876; GFX8-LABEL: ssubsat_v2i16_sv: 2877; GFX8: ; %bb.0: 2878; GFX8-NEXT: s_sext_i32_i16 s4, s0 2879; GFX8-NEXT: s_sext_i32_i16 s5, -1 2880; GFX8-NEXT: s_movk_i32 s2, 0x7fff 2881; GFX8-NEXT: s_max_i32 s6, s4, s5 2882; GFX8-NEXT: s_movk_i32 s3, 0x8000 2883; GFX8-NEXT: s_sub_i32 s6, s6, s2 2884; GFX8-NEXT: s_min_i32 s4, s4, s5 2885; GFX8-NEXT: s_lshr_b32 s1, s0, 16 2886; GFX8-NEXT: s_sub_i32 s4, s4, s3 2887; GFX8-NEXT: v_max_i16_e32 v1, s6, v0 2888; GFX8-NEXT: v_min_i16_e32 v1, s4, v1 2889; GFX8-NEXT: s_sext_i32_i16 s4, s1 2890; GFX8-NEXT: s_max_i32 s6, s4, s5 2891; GFX8-NEXT: s_sub_i32 s2, s6, s2 2892; GFX8-NEXT: s_min_i32 s4, s4, s5 2893; GFX8-NEXT: v_mov_b32_e32 v2, s2 2894; GFX8-NEXT: s_sub_i32 s3, s4, s3 2895; GFX8-NEXT: v_max_i16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 2896; GFX8-NEXT: v_min_i16_e32 v0, s3, v0 2897; GFX8-NEXT: v_mov_b32_e32 v2, s1 2898; GFX8-NEXT: v_sub_u16_e32 v1, s0, v1 2899; GFX8-NEXT: v_sub_u16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2900; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 2901; GFX8-NEXT: ; return to shader part epilog 2902; 2903; GFX9-LABEL: ssubsat_v2i16_sv: 2904; GFX9: ; %bb.0: 2905; GFX9-NEXT: v_pk_sub_i16 v0, s0, v0 clamp 2906; GFX9-NEXT: ; return to shader part epilog 2907; 2908; GFX10-LABEL: ssubsat_v2i16_sv: 2909; GFX10: ; %bb.0: 2910; GFX10-NEXT: v_pk_sub_i16 v0, s0, v0 clamp 2911; GFX10-NEXT: ; return to shader part epilog 2912 %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) 2913 %cast = bitcast <2 x i16> %result to float 2914 ret float %cast 2915} 2916 2917define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { 2918; GFX6-LABEL: ssubsat_v2i16_vs: 2919; GFX6: ; %bb.0: 2920; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2921; GFX6-NEXT: s_brev_b32 s2, -2 2922; GFX6-NEXT: v_max_i32_e32 v2, -1, v0 2923; GFX6-NEXT: s_lshl_b32 s0, s0, 16 2924; GFX6-NEXT: s_brev_b32 s3, 1 2925; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s2, v2 2926; GFX6-NEXT: v_min_i32_e32 v3, -1, v0 2927; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s3, v3 2928; GFX6-NEXT: v_max_i32_e32 v2, s0, v2 2929; GFX6-NEXT: v_min_i32_e32 v2, v2, v3 2930; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2931; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 2932; GFX6-NEXT: v_max_i32_e32 v2, -1, v1 2933; GFX6-NEXT: s_lshl_b32 s0, s1, 16 2934; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s2, v2 2935; GFX6-NEXT: v_min_i32_e32 v3, -1, v1 2936; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, s3, v3 2937; GFX6-NEXT: v_max_i32_e32 v2, s0, v2 2938; GFX6-NEXT: v_min_i32_e32 v2, v2, v3 2939; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 2940; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1 2941; GFX6-NEXT: s_mov_b32 s0, 0xffff 2942; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 2943; GFX6-NEXT: v_and_b32_e32 v1, s0, v1 2944; GFX6-NEXT: v_and_b32_e32 v0, s0, v0 2945; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2946; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 2947; GFX6-NEXT: ; return to shader part epilog 2948; 2949; GFX8-LABEL: ssubsat_v2i16_vs: 2950; GFX8: ; %bb.0: 2951; GFX8-NEXT: s_movk_i32 s2, 0x7fff 2952; GFX8-NEXT: v_max_i16_e32 v2, -1, v0 2953; GFX8-NEXT: s_movk_i32 s3, 0x8000 2954; GFX8-NEXT: v_subrev_u16_e32 v2, s2, v2 2955; GFX8-NEXT: v_min_i16_e32 v3, -1, v0 2956; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 2957; GFX8-NEXT: v_subrev_u16_e32 v3, s3, v3 2958; GFX8-NEXT: v_max_i16_e32 v2, s0, v2 2959; GFX8-NEXT: v_min_i16_e32 v2, v2, v3 2960; GFX8-NEXT: v_max_i16_e32 v3, -1, v1 2961; GFX8-NEXT: s_lshr_b32 s1, s0, 16 2962; GFX8-NEXT: v_subrev_u16_e32 v3, s2, v3 2963; GFX8-NEXT: v_min_i16_e32 v4, -1, v1 2964; GFX8-NEXT: v_subrev_u16_e32 v4, s3, v4 2965; GFX8-NEXT: v_max_i16_e32 v3, s1, v3 2966; GFX8-NEXT: v_min_i16_e32 v3, v3, v4 2967; GFX8-NEXT: v_sub_u16_e32 v0, v0, v2 2968; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 2969; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2970; GFX8-NEXT: ; return to shader part epilog 2971; 2972; GFX9-LABEL: ssubsat_v2i16_vs: 2973; GFX9: ; %bb.0: 2974; GFX9-NEXT: v_pk_sub_i16 v0, v0, s0 clamp 2975; GFX9-NEXT: ; return to shader part epilog 2976; 2977; GFX10-LABEL: ssubsat_v2i16_vs: 2978; GFX10: ; %bb.0: 2979; GFX10-NEXT: v_pk_sub_i16 v0, v0, s0 clamp 2980; GFX10-NEXT: ; return to shader part epilog 2981 %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) 2982 %cast = bitcast <2 x i16> %result to float 2983 ret float %cast 2984} 2985 2986; FIXME: v3i16 insert/extract 2987; define <3 x i16> @v_ssubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) { 2988; %result = call <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs) 2989; ret <3 x i16> %result 2990; } 2991 2992; define amdgpu_ps <3 x i16> @s_ssubsat_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs) { 2993; %result = call <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs) 2994; ret <3 x i16> %result 2995; } 2996 2997define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 2998; GFX6-LABEL: v_ssubsat_v4i16: 2999; GFX6: ; %bb.0: 3000; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3001; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3002; GFX6-NEXT: s_brev_b32 s4, -2 3003; GFX6-NEXT: v_max_i32_e32 v8, -1, v0 3004; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 3005; GFX6-NEXT: s_brev_b32 s5, 1 3006; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s4, v8 3007; GFX6-NEXT: v_min_i32_e32 v10, -1, v0 3008; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s5, v10 3009; GFX6-NEXT: v_max_i32_e32 v4, v8, v4 3010; GFX6-NEXT: v_min_i32_e32 v4, v4, v10 3011; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3012; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 3013; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v5 3014; GFX6-NEXT: v_max_i32_e32 v5, -1, v1 3015; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, s4, v5 3016; GFX6-NEXT: v_min_i32_e32 v8, -1, v1 3017; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s5, v8 3018; GFX6-NEXT: v_max_i32_e32 v4, v5, v4 3019; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3020; GFX6-NEXT: v_bfrev_b32_e32 v9, -2 3021; GFX6-NEXT: v_min_i32_e32 v4, v4, v8 3022; GFX6-NEXT: v_max_i32_e32 v5, -1, v2 3023; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 3024; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v6 3025; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9 3026; GFX6-NEXT: v_min_i32_e32 v6, -1, v2 3027; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, s5, v6 3028; GFX6-NEXT: v_max_i32_e32 v4, v5, v4 3029; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3030; GFX6-NEXT: v_min_i32_e32 v4, v4, v6 3031; GFX6-NEXT: v_max_i32_e32 v5, -1, v3 3032; GFX6-NEXT: v_bfrev_b32_e32 v11, 1 3033; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 3034; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v7 3035; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v9 3036; GFX6-NEXT: v_min_i32_e32 v6, -1, v3 3037; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v11 3038; GFX6-NEXT: v_max_i32_e32 v4, v5, v4 3039; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1 3040; GFX6-NEXT: v_min_i32_e32 v4, v4, v6 3041; GFX6-NEXT: s_mov_b32 s4, 0xffff 3042; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 3043; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v4 3044; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 3045; GFX6-NEXT: v_ashrrev_i32_e32 v2, 16, v2 3046; GFX6-NEXT: v_ashrrev_i32_e32 v3, 16, v3 3047; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 3048; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3049; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3050; GFX6-NEXT: v_and_b32_e32 v1, s4, v2 3051; GFX6-NEXT: v_and_b32_e32 v2, s4, v3 3052; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3053; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 3054; GFX6-NEXT: s_setpc_b64 s[30:31] 3055; 3056; GFX8-LABEL: v_ssubsat_v4i16: 3057; GFX8: ; %bb.0: 3058; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3059; GFX8-NEXT: s_movk_i32 s4, 0x7fff 3060; GFX8-NEXT: v_max_i16_e32 v6, -1, v0 3061; GFX8-NEXT: s_movk_i32 s5, 0x8000 3062; GFX8-NEXT: v_subrev_u16_e32 v6, s4, v6 3063; GFX8-NEXT: v_min_i16_e32 v7, -1, v0 3064; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v0 3065; GFX8-NEXT: v_subrev_u16_e32 v7, s5, v7 3066; GFX8-NEXT: v_max_i16_e32 v6, v6, v2 3067; GFX8-NEXT: v_min_i16_e32 v6, v6, v7 3068; GFX8-NEXT: v_max_i16_e32 v7, -1, v4 3069; GFX8-NEXT: v_subrev_u16_e32 v7, s4, v7 3070; GFX8-NEXT: v_min_i16_e32 v8, -1, v4 3071; GFX8-NEXT: v_subrev_u16_e32 v8, s5, v8 3072; GFX8-NEXT: v_max_i16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3073; GFX8-NEXT: v_max_i16_e32 v7, -1, v1 3074; GFX8-NEXT: v_min_i16_e32 v2, v2, v8 3075; GFX8-NEXT: v_subrev_u16_e32 v7, s4, v7 3076; GFX8-NEXT: v_min_i16_e32 v8, -1, v1 3077; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v1 3078; GFX8-NEXT: v_subrev_u16_e32 v8, s5, v8 3079; GFX8-NEXT: v_max_i16_e32 v7, v7, v3 3080; GFX8-NEXT: v_min_i16_e32 v7, v7, v8 3081; GFX8-NEXT: v_max_i16_e32 v8, -1, v5 3082; GFX8-NEXT: v_subrev_u16_e32 v8, s4, v8 3083; GFX8-NEXT: v_min_i16_e32 v9, -1, v5 3084; GFX8-NEXT: v_subrev_u16_e32 v9, s5, v9 3085; GFX8-NEXT: v_max_i16_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3086; GFX8-NEXT: v_min_i16_e32 v3, v3, v9 3087; GFX8-NEXT: v_sub_u16_e32 v0, v0, v6 3088; GFX8-NEXT: v_sub_u16_sdwa v2, v4, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3089; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 3090; GFX8-NEXT: v_sub_u16_e32 v1, v1, v7 3091; GFX8-NEXT: v_sub_u16_sdwa v2, v5, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3092; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 3093; GFX8-NEXT: s_setpc_b64 s[30:31] 3094; 3095; GFX9-LABEL: v_ssubsat_v4i16: 3096; GFX9: ; %bb.0: 3097; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3098; GFX9-NEXT: v_pk_sub_i16 v0, v0, v2 clamp 3099; GFX9-NEXT: v_pk_sub_i16 v1, v1, v3 clamp 3100; GFX9-NEXT: s_setpc_b64 s[30:31] 3101; 3102; GFX10-LABEL: v_ssubsat_v4i16: 3103; GFX10: ; %bb.0: 3104; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3105; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3106; GFX10-NEXT: v_pk_sub_i16 v0, v0, v2 clamp 3107; GFX10-NEXT: v_pk_sub_i16 v1, v1, v3 clamp 3108; GFX10-NEXT: s_setpc_b64 s[30:31] 3109 %result = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 3110 %cast = bitcast <4 x i16> %result to <2 x float> 3111 ret <2 x float> %cast 3112} 3113 3114define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs) { 3115; GFX6-LABEL: s_ssubsat_v4i16: 3116; GFX6: ; %bb.0: 3117; GFX6-NEXT: s_lshl_b32 s0, s0, 16 3118; GFX6-NEXT: s_brev_b32 s8, -2 3119; GFX6-NEXT: s_max_i32 s10, s0, -1 3120; GFX6-NEXT: s_lshl_b32 s4, s4, 16 3121; GFX6-NEXT: s_brev_b32 s9, 1 3122; GFX6-NEXT: s_sub_i32 s10, s10, s8 3123; GFX6-NEXT: s_min_i32 s11, s0, -1 3124; GFX6-NEXT: s_sub_i32 s11, s11, s9 3125; GFX6-NEXT: s_max_i32 s4, s10, s4 3126; GFX6-NEXT: s_min_i32 s4, s4, s11 3127; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3128; GFX6-NEXT: s_sub_i32 s0, s0, s4 3129; GFX6-NEXT: s_lshl_b32 s4, s5, 16 3130; GFX6-NEXT: s_max_i32 s5, s1, -1 3131; GFX6-NEXT: s_sub_i32 s5, s5, s8 3132; GFX6-NEXT: s_min_i32 s10, s1, -1 3133; GFX6-NEXT: s_sub_i32 s10, s10, s9 3134; GFX6-NEXT: s_max_i32 s4, s5, s4 3135; GFX6-NEXT: s_lshl_b32 s2, s2, 16 3136; GFX6-NEXT: s_min_i32 s4, s4, s10 3137; GFX6-NEXT: s_max_i32 s5, s2, -1 3138; GFX6-NEXT: s_sub_i32 s1, s1, s4 3139; GFX6-NEXT: s_lshl_b32 s4, s6, 16 3140; GFX6-NEXT: s_sub_i32 s5, s5, s8 3141; GFX6-NEXT: s_min_i32 s6, s2, -1 3142; GFX6-NEXT: s_sub_i32 s6, s6, s9 3143; GFX6-NEXT: s_max_i32 s4, s5, s4 3144; GFX6-NEXT: s_lshl_b32 s3, s3, 16 3145; GFX6-NEXT: s_min_i32 s4, s4, s6 3146; GFX6-NEXT: s_max_i32 s5, s3, -1 3147; GFX6-NEXT: s_sub_i32 s2, s2, s4 3148; GFX6-NEXT: s_lshl_b32 s4, s7, 16 3149; GFX6-NEXT: s_sub_i32 s5, s5, s8 3150; GFX6-NEXT: s_min_i32 s6, s3, -1 3151; GFX6-NEXT: s_sub_i32 s6, s6, s9 3152; GFX6-NEXT: s_max_i32 s4, s5, s4 3153; GFX6-NEXT: s_min_i32 s4, s4, s6 3154; GFX6-NEXT: s_ashr_i32 s1, s1, 16 3155; GFX6-NEXT: s_sub_i32 s3, s3, s4 3156; GFX6-NEXT: s_mov_b32 s4, 0xffff 3157; GFX6-NEXT: s_ashr_i32 s0, s0, 16 3158; GFX6-NEXT: s_and_b32 s1, s1, s4 3159; GFX6-NEXT: s_ashr_i32 s2, s2, 16 3160; GFX6-NEXT: s_ashr_i32 s3, s3, 16 3161; GFX6-NEXT: s_and_b32 s0, s0, s4 3162; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3163; GFX6-NEXT: s_or_b32 s0, s0, s1 3164; GFX6-NEXT: s_and_b32 s1, s2, s4 3165; GFX6-NEXT: s_and_b32 s2, s3, s4 3166; GFX6-NEXT: s_lshl_b32 s2, s2, 16 3167; GFX6-NEXT: s_or_b32 s1, s1, s2 3168; GFX6-NEXT: ; return to shader part epilog 3169; 3170; GFX8-LABEL: s_ssubsat_v4i16: 3171; GFX8: ; %bb.0: 3172; GFX8-NEXT: s_sext_i32_i16 s10, s0 3173; GFX8-NEXT: s_sext_i32_i16 s11, -1 3174; GFX8-NEXT: s_movk_i32 s8, 0x7fff 3175; GFX8-NEXT: s_max_i32 s12, s10, s11 3176; GFX8-NEXT: s_sub_i32 s12, s12, s8 3177; GFX8-NEXT: s_lshr_b32 s6, s2, 16 3178; GFX8-NEXT: s_movk_i32 s9, 0x8000 3179; GFX8-NEXT: s_min_i32 s10, s10, s11 3180; GFX8-NEXT: s_sext_i32_i16 s12, s12 3181; GFX8-NEXT: s_sext_i32_i16 s2, s2 3182; GFX8-NEXT: s_sub_i32 s10, s10, s9 3183; GFX8-NEXT: s_max_i32 s2, s12, s2 3184; GFX8-NEXT: s_sext_i32_i16 s2, s2 3185; GFX8-NEXT: s_sext_i32_i16 s10, s10 3186; GFX8-NEXT: s_lshr_b32 s4, s0, 16 3187; GFX8-NEXT: s_min_i32 s2, s2, s10 3188; GFX8-NEXT: s_sub_i32 s0, s0, s2 3189; GFX8-NEXT: s_sext_i32_i16 s2, s4 3190; GFX8-NEXT: s_max_i32 s10, s2, s11 3191; GFX8-NEXT: s_sub_i32 s10, s10, s8 3192; GFX8-NEXT: s_min_i32 s2, s2, s11 3193; GFX8-NEXT: s_sext_i32_i16 s10, s10 3194; GFX8-NEXT: s_sext_i32_i16 s6, s6 3195; GFX8-NEXT: s_sub_i32 s2, s2, s9 3196; GFX8-NEXT: s_max_i32 s6, s10, s6 3197; GFX8-NEXT: s_sext_i32_i16 s6, s6 3198; GFX8-NEXT: s_sext_i32_i16 s2, s2 3199; GFX8-NEXT: s_min_i32 s2, s6, s2 3200; GFX8-NEXT: s_sub_i32 s2, s4, s2 3201; GFX8-NEXT: s_sext_i32_i16 s4, s1 3202; GFX8-NEXT: s_max_i32 s6, s4, s11 3203; GFX8-NEXT: s_sub_i32 s6, s6, s8 3204; GFX8-NEXT: s_lshr_b32 s7, s3, 16 3205; GFX8-NEXT: s_min_i32 s4, s4, s11 3206; GFX8-NEXT: s_sext_i32_i16 s6, s6 3207; GFX8-NEXT: s_sext_i32_i16 s3, s3 3208; GFX8-NEXT: s_sub_i32 s4, s4, s9 3209; GFX8-NEXT: s_max_i32 s3, s6, s3 3210; GFX8-NEXT: s_sext_i32_i16 s3, s3 3211; GFX8-NEXT: s_sext_i32_i16 s4, s4 3212; GFX8-NEXT: s_lshr_b32 s5, s1, 16 3213; GFX8-NEXT: s_min_i32 s3, s3, s4 3214; GFX8-NEXT: s_sub_i32 s1, s1, s3 3215; GFX8-NEXT: s_sext_i32_i16 s3, s5 3216; GFX8-NEXT: s_max_i32 s4, s3, s11 3217; GFX8-NEXT: s_sub_i32 s4, s4, s8 3218; GFX8-NEXT: s_min_i32 s3, s3, s11 3219; GFX8-NEXT: s_sext_i32_i16 s4, s4 3220; GFX8-NEXT: s_sext_i32_i16 s6, s7 3221; GFX8-NEXT: s_sub_i32 s3, s3, s9 3222; GFX8-NEXT: s_max_i32 s4, s4, s6 3223; GFX8-NEXT: s_sext_i32_i16 s4, s4 3224; GFX8-NEXT: s_sext_i32_i16 s3, s3 3225; GFX8-NEXT: s_min_i32 s3, s4, s3 3226; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3227; GFX8-NEXT: s_sub_i32 s3, s5, s3 3228; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 3229; GFX8-NEXT: s_lshl_b32 s2, s2, 16 3230; GFX8-NEXT: s_or_b32 s0, s0, s2 3231; GFX8-NEXT: s_bfe_u32 s2, s3, 0x100000 3232; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3233; GFX8-NEXT: s_lshl_b32 s2, s2, 16 3234; GFX8-NEXT: s_or_b32 s1, s1, s2 3235; GFX8-NEXT: ; return to shader part epilog 3236; 3237; GFX9-LABEL: s_ssubsat_v4i16: 3238; GFX9: ; %bb.0: 3239; GFX9-NEXT: v_mov_b32_e32 v0, s2 3240; GFX9-NEXT: v_mov_b32_e32 v1, s3 3241; GFX9-NEXT: v_pk_sub_i16 v0, s0, v0 clamp 3242; GFX9-NEXT: v_pk_sub_i16 v1, s1, v1 clamp 3243; GFX9-NEXT: v_readfirstlane_b32 s0, v0 3244; GFX9-NEXT: v_readfirstlane_b32 s1, v1 3245; GFX9-NEXT: ; return to shader part epilog 3246; 3247; GFX10-LABEL: s_ssubsat_v4i16: 3248; GFX10: ; %bb.0: 3249; GFX10-NEXT: v_pk_sub_i16 v0, s0, s2 clamp 3250; GFX10-NEXT: v_pk_sub_i16 v1, s1, s3 clamp 3251; GFX10-NEXT: v_readfirstlane_b32 s0, v0 3252; GFX10-NEXT: v_readfirstlane_b32 s1, v1 3253; GFX10-NEXT: ; return to shader part epilog 3254 %result = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 3255 %cast = bitcast <4 x i16> %result to <2 x i32> 3256 ret <2 x i32> %cast 3257} 3258 3259; FIXME 3260; define <5 x i16> @v_ssubsat_v5i16(<5 x i16> %lhs, <5 x i16> %rhs) { 3261; %result = call <5 x i16> @llvm.ssub.sat.v5i16(<5 x i16> %lhs, <5 x i16> %rhs) 3262; ret <5 x i16> %result 3263; } 3264 3265; define amdgpu_ps <5 x i16> @s_ssubsat_v5i16(<5 x i16> inreg %lhs, <5 x i16> inreg %rhs) { 3266; %result = call <5 x i16> @llvm.ssub.sat.v5i16(<5 x i16> %lhs, <5 x i16> %rhs) 3267; ret <5 x i16> %result 3268; } 3269 3270define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) { 3271; GFX6-LABEL: v_ssubsat_v6i16: 3272; GFX6: ; %bb.0: 3273; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3274; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3275; GFX6-NEXT: s_brev_b32 s4, -2 3276; GFX6-NEXT: v_max_i32_e32 v12, -1, v0 3277; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v6 3278; GFX6-NEXT: s_brev_b32 s5, 1 3279; GFX6-NEXT: v_subrev_i32_e32 v12, vcc, s4, v12 3280; GFX6-NEXT: v_min_i32_e32 v14, -1, v0 3281; GFX6-NEXT: v_subrev_i32_e32 v14, vcc, s5, v14 3282; GFX6-NEXT: v_max_i32_e32 v6, v12, v6 3283; GFX6-NEXT: v_min_i32_e32 v6, v6, v14 3284; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3285; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 3286; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v7 3287; GFX6-NEXT: v_max_i32_e32 v7, -1, v1 3288; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, s4, v7 3289; GFX6-NEXT: v_min_i32_e32 v12, -1, v1 3290; GFX6-NEXT: v_subrev_i32_e32 v12, vcc, s5, v12 3291; GFX6-NEXT: v_max_i32_e32 v6, v7, v6 3292; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3293; GFX6-NEXT: v_bfrev_b32_e32 v13, -2 3294; GFX6-NEXT: v_min_i32_e32 v6, v6, v12 3295; GFX6-NEXT: v_max_i32_e32 v7, -1, v2 3296; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 3297; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v8 3298; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13 3299; GFX6-NEXT: v_min_i32_e32 v8, -1, v2 3300; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, s5, v8 3301; GFX6-NEXT: v_max_i32_e32 v6, v7, v6 3302; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3303; GFX6-NEXT: v_min_i32_e32 v6, v6, v8 3304; GFX6-NEXT: v_max_i32_e32 v7, -1, v3 3305; GFX6-NEXT: v_bfrev_b32_e32 v15, 1 3306; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 3307; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v9 3308; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13 3309; GFX6-NEXT: v_min_i32_e32 v8, -1, v3 3310; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v15 3311; GFX6-NEXT: v_max_i32_e32 v6, v7, v6 3312; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 3313; GFX6-NEXT: v_min_i32_e32 v6, v6, v8 3314; GFX6-NEXT: v_max_i32_e32 v7, -1, v4 3315; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v6 3316; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v10 3317; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13 3318; GFX6-NEXT: v_min_i32_e32 v8, -1, v4 3319; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v15 3320; GFX6-NEXT: v_max_i32_e32 v6, v7, v6 3321; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 3322; GFX6-NEXT: v_min_i32_e32 v6, v6, v8 3323; GFX6-NEXT: v_max_i32_e32 v7, -1, v5 3324; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 3325; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v11 3326; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v13 3327; GFX6-NEXT: v_min_i32_e32 v8, -1, v5 3328; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1 3329; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v15 3330; GFX6-NEXT: v_max_i32_e32 v6, v7, v6 3331; GFX6-NEXT: s_mov_b32 s4, 0xffff 3332; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 3333; GFX6-NEXT: v_min_i32_e32 v6, v6, v8 3334; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 3335; GFX6-NEXT: v_ashrrev_i32_e32 v2, 16, v2 3336; GFX6-NEXT: v_ashrrev_i32_e32 v3, 16, v3 3337; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v6 3338; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 3339; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3340; GFX6-NEXT: v_ashrrev_i32_e32 v5, 16, v5 3341; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3342; GFX6-NEXT: v_and_b32_e32 v1, s4, v2 3343; GFX6-NEXT: v_and_b32_e32 v2, s4, v3 3344; GFX6-NEXT: v_ashrrev_i32_e32 v4, 16, v4 3345; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3346; GFX6-NEXT: v_and_b32_e32 v3, s4, v5 3347; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 3348; GFX6-NEXT: v_and_b32_e32 v2, s4, v4 3349; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3350; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 3351; GFX6-NEXT: s_setpc_b64 s[30:31] 3352; 3353; GFX8-LABEL: v_ssubsat_v6i16: 3354; GFX8: ; %bb.0: 3355; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3356; GFX8-NEXT: s_movk_i32 s4, 0x7fff 3357; GFX8-NEXT: v_max_i16_e32 v9, -1, v0 3358; GFX8-NEXT: s_movk_i32 s5, 0x8000 3359; GFX8-NEXT: v_subrev_u16_e32 v9, s4, v9 3360; GFX8-NEXT: v_min_i16_e32 v11, -1, v0 3361; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 3362; GFX8-NEXT: v_subrev_u16_e32 v11, s5, v11 3363; GFX8-NEXT: v_max_i16_e32 v9, v9, v3 3364; GFX8-NEXT: v_min_i16_e32 v9, v9, v11 3365; GFX8-NEXT: v_max_i16_e32 v11, -1, v6 3366; GFX8-NEXT: v_subrev_u16_e32 v11, s4, v11 3367; GFX8-NEXT: v_min_i16_e32 v13, -1, v6 3368; GFX8-NEXT: v_subrev_u16_e32 v13, s5, v13 3369; GFX8-NEXT: v_max_i16_sdwa v3, v11, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3370; GFX8-NEXT: v_max_i16_e32 v11, -1, v1 3371; GFX8-NEXT: v_min_i16_e32 v3, v3, v13 3372; GFX8-NEXT: v_subrev_u16_e32 v11, s4, v11 3373; GFX8-NEXT: v_min_i16_e32 v13, -1, v1 3374; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v1 3375; GFX8-NEXT: v_subrev_u16_e32 v13, s5, v13 3376; GFX8-NEXT: v_max_i16_e32 v11, v11, v4 3377; GFX8-NEXT: v_min_i16_e32 v11, v11, v13 3378; GFX8-NEXT: v_max_i16_e32 v13, -1, v7 3379; GFX8-NEXT: v_subrev_u16_e32 v13, s4, v13 3380; GFX8-NEXT: v_min_i16_e32 v14, -1, v7 3381; GFX8-NEXT: v_mov_b32_e32 v10, 0x7fff 3382; GFX8-NEXT: v_subrev_u16_e32 v14, s5, v14 3383; GFX8-NEXT: v_max_i16_sdwa v4, v13, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3384; GFX8-NEXT: v_max_i16_e32 v13, -1, v2 3385; GFX8-NEXT: v_mov_b32_e32 v12, 0xffff8000 3386; GFX8-NEXT: v_min_i16_e32 v4, v4, v14 3387; GFX8-NEXT: v_sub_u16_e32 v13, v13, v10 3388; GFX8-NEXT: v_min_i16_e32 v14, -1, v2 3389; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v2 3390; GFX8-NEXT: v_sub_u16_e32 v14, v14, v12 3391; GFX8-NEXT: v_max_i16_e32 v13, v13, v5 3392; GFX8-NEXT: v_min_i16_e32 v13, v13, v14 3393; GFX8-NEXT: v_max_i16_e32 v14, -1, v8 3394; GFX8-NEXT: v_sub_u16_e32 v10, v14, v10 3395; GFX8-NEXT: v_min_i16_e32 v14, -1, v8 3396; GFX8-NEXT: v_sub_u16_e32 v12, v14, v12 3397; GFX8-NEXT: v_max_i16_sdwa v5, v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3398; GFX8-NEXT: v_sub_u16_e32 v0, v0, v9 3399; GFX8-NEXT: v_sub_u16_sdwa v3, v6, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3400; GFX8-NEXT: v_min_i16_e32 v5, v5, v12 3401; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 3402; GFX8-NEXT: v_sub_u16_e32 v1, v1, v11 3403; GFX8-NEXT: v_sub_u16_sdwa v3, v7, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3404; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 3405; GFX8-NEXT: v_sub_u16_e32 v2, v2, v13 3406; GFX8-NEXT: v_sub_u16_sdwa v3, v8, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3407; GFX8-NEXT: v_or_b32_e32 v2, v2, v3 3408; GFX8-NEXT: s_setpc_b64 s[30:31] 3409; 3410; GFX9-LABEL: v_ssubsat_v6i16: 3411; GFX9: ; %bb.0: 3412; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3413; GFX9-NEXT: v_pk_sub_i16 v0, v0, v3 clamp 3414; GFX9-NEXT: v_pk_sub_i16 v1, v1, v4 clamp 3415; GFX9-NEXT: v_pk_sub_i16 v2, v2, v5 clamp 3416; GFX9-NEXT: s_setpc_b64 s[30:31] 3417; 3418; GFX10-LABEL: v_ssubsat_v6i16: 3419; GFX10: ; %bb.0: 3420; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3421; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3422; GFX10-NEXT: v_pk_sub_i16 v0, v0, v3 clamp 3423; GFX10-NEXT: v_pk_sub_i16 v1, v1, v4 clamp 3424; GFX10-NEXT: v_pk_sub_i16 v2, v2, v5 clamp 3425; GFX10-NEXT: s_setpc_b64 s[30:31] 3426 %result = call <6 x i16> @llvm.ssub.sat.v6i16(<6 x i16> %lhs, <6 x i16> %rhs) 3427 %cast = bitcast <6 x i16> %result to <3 x float> 3428 ret <3 x float> %cast 3429} 3430 3431define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inreg %rhs) { 3432; GFX6-LABEL: s_ssubsat_v6i16: 3433; GFX6: ; %bb.0: 3434; GFX6-NEXT: s_lshl_b32 s0, s0, 16 3435; GFX6-NEXT: s_brev_b32 s12, -2 3436; GFX6-NEXT: s_max_i32 s14, s0, -1 3437; GFX6-NEXT: s_lshl_b32 s6, s6, 16 3438; GFX6-NEXT: s_brev_b32 s13, 1 3439; GFX6-NEXT: s_sub_i32 s14, s14, s12 3440; GFX6-NEXT: s_min_i32 s15, s0, -1 3441; GFX6-NEXT: s_sub_i32 s15, s15, s13 3442; GFX6-NEXT: s_max_i32 s6, s14, s6 3443; GFX6-NEXT: s_min_i32 s6, s6, s15 3444; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3445; GFX6-NEXT: s_sub_i32 s0, s0, s6 3446; GFX6-NEXT: s_lshl_b32 s6, s7, 16 3447; GFX6-NEXT: s_max_i32 s7, s1, -1 3448; GFX6-NEXT: s_sub_i32 s7, s7, s12 3449; GFX6-NEXT: s_min_i32 s14, s1, -1 3450; GFX6-NEXT: s_sub_i32 s14, s14, s13 3451; GFX6-NEXT: s_max_i32 s6, s7, s6 3452; GFX6-NEXT: s_lshl_b32 s2, s2, 16 3453; GFX6-NEXT: s_min_i32 s6, s6, s14 3454; GFX6-NEXT: s_max_i32 s7, s2, -1 3455; GFX6-NEXT: s_sub_i32 s1, s1, s6 3456; GFX6-NEXT: s_lshl_b32 s6, s8, 16 3457; GFX6-NEXT: s_sub_i32 s7, s7, s12 3458; GFX6-NEXT: s_min_i32 s8, s2, -1 3459; GFX6-NEXT: s_sub_i32 s8, s8, s13 3460; GFX6-NEXT: s_max_i32 s6, s7, s6 3461; GFX6-NEXT: s_lshl_b32 s3, s3, 16 3462; GFX6-NEXT: s_min_i32 s6, s6, s8 3463; GFX6-NEXT: s_max_i32 s7, s3, -1 3464; GFX6-NEXT: s_sub_i32 s2, s2, s6 3465; GFX6-NEXT: s_lshl_b32 s6, s9, 16 3466; GFX6-NEXT: s_sub_i32 s7, s7, s12 3467; GFX6-NEXT: s_min_i32 s8, s3, -1 3468; GFX6-NEXT: s_sub_i32 s8, s8, s13 3469; GFX6-NEXT: s_max_i32 s6, s7, s6 3470; GFX6-NEXT: s_lshl_b32 s4, s4, 16 3471; GFX6-NEXT: s_min_i32 s6, s6, s8 3472; GFX6-NEXT: s_max_i32 s7, s4, -1 3473; GFX6-NEXT: s_sub_i32 s3, s3, s6 3474; GFX6-NEXT: s_lshl_b32 s6, s10, 16 3475; GFX6-NEXT: s_sub_i32 s7, s7, s12 3476; GFX6-NEXT: s_min_i32 s8, s4, -1 3477; GFX6-NEXT: s_sub_i32 s8, s8, s13 3478; GFX6-NEXT: s_max_i32 s6, s7, s6 3479; GFX6-NEXT: s_lshl_b32 s5, s5, 16 3480; GFX6-NEXT: s_min_i32 s6, s6, s8 3481; GFX6-NEXT: s_max_i32 s7, s5, -1 3482; GFX6-NEXT: s_sub_i32 s4, s4, s6 3483; GFX6-NEXT: s_lshl_b32 s6, s11, 16 3484; GFX6-NEXT: s_sub_i32 s7, s7, s12 3485; GFX6-NEXT: s_min_i32 s8, s5, -1 3486; GFX6-NEXT: s_sub_i32 s8, s8, s13 3487; GFX6-NEXT: s_max_i32 s6, s7, s6 3488; GFX6-NEXT: s_min_i32 s6, s6, s8 3489; GFX6-NEXT: s_ashr_i32 s1, s1, 16 3490; GFX6-NEXT: s_sub_i32 s5, s5, s6 3491; GFX6-NEXT: s_mov_b32 s6, 0xffff 3492; GFX6-NEXT: s_ashr_i32 s0, s0, 16 3493; GFX6-NEXT: s_and_b32 s1, s1, s6 3494; GFX6-NEXT: s_ashr_i32 s2, s2, 16 3495; GFX6-NEXT: s_ashr_i32 s3, s3, 16 3496; GFX6-NEXT: s_and_b32 s0, s0, s6 3497; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3498; GFX6-NEXT: s_ashr_i32 s5, s5, 16 3499; GFX6-NEXT: s_or_b32 s0, s0, s1 3500; GFX6-NEXT: s_and_b32 s1, s2, s6 3501; GFX6-NEXT: s_and_b32 s2, s3, s6 3502; GFX6-NEXT: s_ashr_i32 s4, s4, 16 3503; GFX6-NEXT: s_lshl_b32 s2, s2, 16 3504; GFX6-NEXT: s_and_b32 s3, s5, s6 3505; GFX6-NEXT: s_or_b32 s1, s1, s2 3506; GFX6-NEXT: s_and_b32 s2, s4, s6 3507; GFX6-NEXT: s_lshl_b32 s3, s3, 16 3508; GFX6-NEXT: s_or_b32 s2, s2, s3 3509; GFX6-NEXT: ; return to shader part epilog 3510; 3511; GFX8-LABEL: s_ssubsat_v6i16: 3512; GFX8: ; %bb.0: 3513; GFX8-NEXT: s_sext_i32_i16 s14, s0 3514; GFX8-NEXT: s_sext_i32_i16 s15, -1 3515; GFX8-NEXT: s_movk_i32 s12, 0x7fff 3516; GFX8-NEXT: s_max_i32 s16, s14, s15 3517; GFX8-NEXT: s_sub_i32 s16, s16, s12 3518; GFX8-NEXT: s_lshr_b32 s9, s3, 16 3519; GFX8-NEXT: s_movk_i32 s13, 0x8000 3520; GFX8-NEXT: s_min_i32 s14, s14, s15 3521; GFX8-NEXT: s_sext_i32_i16 s16, s16 3522; GFX8-NEXT: s_sext_i32_i16 s3, s3 3523; GFX8-NEXT: s_sub_i32 s14, s14, s13 3524; GFX8-NEXT: s_max_i32 s3, s16, s3 3525; GFX8-NEXT: s_sext_i32_i16 s3, s3 3526; GFX8-NEXT: s_sext_i32_i16 s14, s14 3527; GFX8-NEXT: s_lshr_b32 s6, s0, 16 3528; GFX8-NEXT: s_min_i32 s3, s3, s14 3529; GFX8-NEXT: s_sub_i32 s0, s0, s3 3530; GFX8-NEXT: s_sext_i32_i16 s3, s6 3531; GFX8-NEXT: s_max_i32 s14, s3, s15 3532; GFX8-NEXT: s_sub_i32 s14, s14, s12 3533; GFX8-NEXT: s_min_i32 s3, s3, s15 3534; GFX8-NEXT: s_sext_i32_i16 s14, s14 3535; GFX8-NEXT: s_sext_i32_i16 s9, s9 3536; GFX8-NEXT: s_sub_i32 s3, s3, s13 3537; GFX8-NEXT: s_max_i32 s9, s14, s9 3538; GFX8-NEXT: s_sext_i32_i16 s9, s9 3539; GFX8-NEXT: s_sext_i32_i16 s3, s3 3540; GFX8-NEXT: s_min_i32 s3, s9, s3 3541; GFX8-NEXT: s_sub_i32 s3, s6, s3 3542; GFX8-NEXT: s_sext_i32_i16 s6, s1 3543; GFX8-NEXT: s_max_i32 s9, s6, s15 3544; GFX8-NEXT: s_sub_i32 s9, s9, s12 3545; GFX8-NEXT: s_lshr_b32 s10, s4, 16 3546; GFX8-NEXT: s_min_i32 s6, s6, s15 3547; GFX8-NEXT: s_sext_i32_i16 s9, s9 3548; GFX8-NEXT: s_sext_i32_i16 s4, s4 3549; GFX8-NEXT: s_sub_i32 s6, s6, s13 3550; GFX8-NEXT: s_max_i32 s4, s9, s4 3551; GFX8-NEXT: s_sext_i32_i16 s4, s4 3552; GFX8-NEXT: s_sext_i32_i16 s6, s6 3553; GFX8-NEXT: s_lshr_b32 s7, s1, 16 3554; GFX8-NEXT: s_min_i32 s4, s4, s6 3555; GFX8-NEXT: s_sub_i32 s1, s1, s4 3556; GFX8-NEXT: s_sext_i32_i16 s4, s7 3557; GFX8-NEXT: s_max_i32 s6, s4, s15 3558; GFX8-NEXT: s_sub_i32 s6, s6, s12 3559; GFX8-NEXT: s_min_i32 s4, s4, s15 3560; GFX8-NEXT: s_sext_i32_i16 s6, s6 3561; GFX8-NEXT: s_sext_i32_i16 s9, s10 3562; GFX8-NEXT: s_sub_i32 s4, s4, s13 3563; GFX8-NEXT: s_max_i32 s6, s6, s9 3564; GFX8-NEXT: s_sext_i32_i16 s6, s6 3565; GFX8-NEXT: s_sext_i32_i16 s4, s4 3566; GFX8-NEXT: s_min_i32 s4, s6, s4 3567; GFX8-NEXT: s_sext_i32_i16 s6, s2 3568; GFX8-NEXT: s_sub_i32 s4, s7, s4 3569; GFX8-NEXT: s_max_i32 s7, s6, s15 3570; GFX8-NEXT: s_sub_i32 s7, s7, s12 3571; GFX8-NEXT: s_lshr_b32 s11, s5, 16 3572; GFX8-NEXT: s_min_i32 s6, s6, s15 3573; GFX8-NEXT: s_sext_i32_i16 s7, s7 3574; GFX8-NEXT: s_sext_i32_i16 s5, s5 3575; GFX8-NEXT: s_sub_i32 s6, s6, s13 3576; GFX8-NEXT: s_max_i32 s5, s7, s5 3577; GFX8-NEXT: s_sext_i32_i16 s5, s5 3578; GFX8-NEXT: s_sext_i32_i16 s6, s6 3579; GFX8-NEXT: s_lshr_b32 s8, s2, 16 3580; GFX8-NEXT: s_min_i32 s5, s5, s6 3581; GFX8-NEXT: s_sub_i32 s2, s2, s5 3582; GFX8-NEXT: s_sext_i32_i16 s5, s8 3583; GFX8-NEXT: s_max_i32 s6, s5, s15 3584; GFX8-NEXT: s_sub_i32 s6, s6, s12 3585; GFX8-NEXT: s_min_i32 s5, s5, s15 3586; GFX8-NEXT: s_sext_i32_i16 s6, s6 3587; GFX8-NEXT: s_sext_i32_i16 s7, s11 3588; GFX8-NEXT: s_sub_i32 s5, s5, s13 3589; GFX8-NEXT: s_max_i32 s6, s6, s7 3590; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 3591; GFX8-NEXT: s_sext_i32_i16 s6, s6 3592; GFX8-NEXT: s_sext_i32_i16 s5, s5 3593; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 3594; GFX8-NEXT: s_lshl_b32 s3, s3, 16 3595; GFX8-NEXT: s_min_i32 s5, s6, s5 3596; GFX8-NEXT: s_or_b32 s0, s0, s3 3597; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000 3598; GFX8-NEXT: s_sub_i32 s5, s8, s5 3599; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3600; GFX8-NEXT: s_lshl_b32 s3, s3, 16 3601; GFX8-NEXT: s_or_b32 s1, s1, s3 3602; GFX8-NEXT: s_bfe_u32 s3, s5, 0x100000 3603; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3604; GFX8-NEXT: s_lshl_b32 s3, s3, 16 3605; GFX8-NEXT: s_or_b32 s2, s2, s3 3606; GFX8-NEXT: ; return to shader part epilog 3607; 3608; GFX9-LABEL: s_ssubsat_v6i16: 3609; GFX9: ; %bb.0: 3610; GFX9-NEXT: v_mov_b32_e32 v0, s3 3611; GFX9-NEXT: v_mov_b32_e32 v1, s4 3612; GFX9-NEXT: v_mov_b32_e32 v2, s5 3613; GFX9-NEXT: v_pk_sub_i16 v0, s0, v0 clamp 3614; GFX9-NEXT: v_pk_sub_i16 v1, s1, v1 clamp 3615; GFX9-NEXT: v_pk_sub_i16 v2, s2, v2 clamp 3616; GFX9-NEXT: v_readfirstlane_b32 s0, v0 3617; GFX9-NEXT: v_readfirstlane_b32 s1, v1 3618; GFX9-NEXT: v_readfirstlane_b32 s2, v2 3619; GFX9-NEXT: ; return to shader part epilog 3620; 3621; GFX10-LABEL: s_ssubsat_v6i16: 3622; GFX10: ; %bb.0: 3623; GFX10-NEXT: v_pk_sub_i16 v0, s0, s3 clamp 3624; GFX10-NEXT: v_pk_sub_i16 v1, s1, s4 clamp 3625; GFX10-NEXT: v_pk_sub_i16 v2, s2, s5 clamp 3626; GFX10-NEXT: v_readfirstlane_b32 s0, v0 3627; GFX10-NEXT: v_readfirstlane_b32 s1, v1 3628; GFX10-NEXT: v_readfirstlane_b32 s2, v2 3629; GFX10-NEXT: ; return to shader part epilog 3630 %result = call <6 x i16> @llvm.ssub.sat.v6i16(<6 x i16> %lhs, <6 x i16> %rhs) 3631 %cast = bitcast <6 x i16> %result to <3 x i32> 3632 ret <3 x i32> %cast 3633} 3634 3635define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 3636; GFX6-LABEL: v_ssubsat_v8i16: 3637; GFX6: ; %bb.0: 3638; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3639; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3640; GFX6-NEXT: s_brev_b32 s4, -2 3641; GFX6-NEXT: v_max_i32_e32 v16, -1, v0 3642; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v8 3643; GFX6-NEXT: s_brev_b32 s5, 1 3644; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, s4, v16 3645; GFX6-NEXT: v_min_i32_e32 v18, -1, v0 3646; GFX6-NEXT: v_subrev_i32_e32 v18, vcc, s5, v18 3647; GFX6-NEXT: v_max_i32_e32 v8, v16, v8 3648; GFX6-NEXT: v_min_i32_e32 v8, v8, v18 3649; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3650; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 3651; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v9 3652; GFX6-NEXT: v_max_i32_e32 v9, -1, v1 3653; GFX6-NEXT: v_subrev_i32_e32 v9, vcc, s4, v9 3654; GFX6-NEXT: v_min_i32_e32 v16, -1, v1 3655; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, s5, v16 3656; GFX6-NEXT: v_max_i32_e32 v8, v9, v8 3657; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3658; GFX6-NEXT: v_bfrev_b32_e32 v17, -2 3659; GFX6-NEXT: v_min_i32_e32 v8, v8, v16 3660; GFX6-NEXT: v_max_i32_e32 v9, -1, v2 3661; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 3662; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v10 3663; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17 3664; GFX6-NEXT: v_min_i32_e32 v10, -1, v2 3665; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, s5, v10 3666; GFX6-NEXT: v_max_i32_e32 v8, v9, v8 3667; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3668; GFX6-NEXT: v_min_i32_e32 v8, v8, v10 3669; GFX6-NEXT: v_max_i32_e32 v9, -1, v3 3670; GFX6-NEXT: v_bfrev_b32_e32 v19, 1 3671; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 3672; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v11 3673; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17 3674; GFX6-NEXT: v_min_i32_e32 v10, -1, v3 3675; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19 3676; GFX6-NEXT: v_max_i32_e32 v8, v9, v8 3677; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 3678; GFX6-NEXT: v_min_i32_e32 v8, v8, v10 3679; GFX6-NEXT: v_max_i32_e32 v9, -1, v4 3680; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v8 3681; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v12 3682; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17 3683; GFX6-NEXT: v_min_i32_e32 v10, -1, v4 3684; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19 3685; GFX6-NEXT: v_max_i32_e32 v8, v9, v8 3686; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 3687; GFX6-NEXT: v_min_i32_e32 v8, v8, v10 3688; GFX6-NEXT: v_max_i32_e32 v9, -1, v5 3689; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v8 3690; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v13 3691; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17 3692; GFX6-NEXT: v_min_i32_e32 v10, -1, v5 3693; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19 3694; GFX6-NEXT: v_max_i32_e32 v8, v9, v8 3695; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v6 3696; GFX6-NEXT: v_min_i32_e32 v8, v8, v10 3697; GFX6-NEXT: v_max_i32_e32 v9, -1, v6 3698; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v8 3699; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v14 3700; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17 3701; GFX6-NEXT: v_min_i32_e32 v10, -1, v6 3702; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19 3703; GFX6-NEXT: v_max_i32_e32 v8, v9, v8 3704; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7 3705; GFX6-NEXT: v_min_i32_e32 v8, v8, v10 3706; GFX6-NEXT: v_max_i32_e32 v9, -1, v7 3707; GFX6-NEXT: v_ashrrev_i32_e32 v1, 16, v1 3708; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v8 3709; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v15 3710; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v17 3711; GFX6-NEXT: v_min_i32_e32 v10, -1, v7 3712; GFX6-NEXT: s_mov_b32 s4, 0xffff 3713; GFX6-NEXT: v_ashrrev_i32_e32 v0, 16, v0 3714; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v19 3715; GFX6-NEXT: v_max_i32_e32 v8, v9, v8 3716; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 3717; GFX6-NEXT: v_ashrrev_i32_e32 v2, 16, v2 3718; GFX6-NEXT: v_ashrrev_i32_e32 v3, 16, v3 3719; GFX6-NEXT: v_min_i32_e32 v8, v8, v10 3720; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 3721; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3722; GFX6-NEXT: v_ashrrev_i32_e32 v5, 16, v5 3723; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v8 3724; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3725; GFX6-NEXT: v_and_b32_e32 v1, s4, v2 3726; GFX6-NEXT: v_and_b32_e32 v2, s4, v3 3727; GFX6-NEXT: v_ashrrev_i32_e32 v4, 16, v4 3728; GFX6-NEXT: v_ashrrev_i32_e32 v7, 16, v7 3729; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3730; GFX6-NEXT: v_and_b32_e32 v3, s4, v5 3731; GFX6-NEXT: v_ashrrev_i32_e32 v6, 16, v6 3732; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 3733; GFX6-NEXT: v_and_b32_e32 v2, s4, v4 3734; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3735; GFX6-NEXT: v_and_b32_e32 v4, s4, v7 3736; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 3737; GFX6-NEXT: v_and_b32_e32 v3, s4, v6 3738; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 3739; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 3740; GFX6-NEXT: s_setpc_b64 s[30:31] 3741; 3742; GFX8-LABEL: v_ssubsat_v8i16: 3743; GFX8: ; %bb.0: 3744; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3745; GFX8-NEXT: s_movk_i32 s4, 0x7fff 3746; GFX8-NEXT: v_max_i16_e32 v12, -1, v0 3747; GFX8-NEXT: s_movk_i32 s5, 0x8000 3748; GFX8-NEXT: v_subrev_u16_e32 v12, s4, v12 3749; GFX8-NEXT: v_min_i16_e32 v14, -1, v0 3750; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v0 3751; GFX8-NEXT: v_subrev_u16_e32 v14, s5, v14 3752; GFX8-NEXT: v_max_i16_e32 v12, v12, v4 3753; GFX8-NEXT: v_min_i16_e32 v12, v12, v14 3754; GFX8-NEXT: v_max_i16_e32 v14, -1, v8 3755; GFX8-NEXT: v_subrev_u16_e32 v14, s4, v14 3756; GFX8-NEXT: v_min_i16_e32 v16, -1, v8 3757; GFX8-NEXT: v_subrev_u16_e32 v16, s5, v16 3758; GFX8-NEXT: v_max_i16_sdwa v4, v14, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3759; GFX8-NEXT: v_max_i16_e32 v14, -1, v1 3760; GFX8-NEXT: v_min_i16_e32 v4, v4, v16 3761; GFX8-NEXT: v_subrev_u16_e32 v14, s4, v14 3762; GFX8-NEXT: v_min_i16_e32 v16, -1, v1 3763; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v1 3764; GFX8-NEXT: v_subrev_u16_e32 v16, s5, v16 3765; GFX8-NEXT: v_max_i16_e32 v14, v14, v5 3766; GFX8-NEXT: v_min_i16_e32 v14, v14, v16 3767; GFX8-NEXT: v_max_i16_e32 v16, -1, v9 3768; GFX8-NEXT: v_subrev_u16_e32 v16, s4, v16 3769; GFX8-NEXT: v_min_i16_e32 v17, -1, v9 3770; GFX8-NEXT: v_mov_b32_e32 v13, 0x7fff 3771; GFX8-NEXT: v_subrev_u16_e32 v17, s5, v17 3772; GFX8-NEXT: v_max_i16_sdwa v5, v16, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3773; GFX8-NEXT: v_max_i16_e32 v16, -1, v2 3774; GFX8-NEXT: v_mov_b32_e32 v15, 0xffff8000 3775; GFX8-NEXT: v_min_i16_e32 v5, v5, v17 3776; GFX8-NEXT: v_sub_u16_e32 v16, v16, v13 3777; GFX8-NEXT: v_min_i16_e32 v17, -1, v2 3778; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v2 3779; GFX8-NEXT: v_sub_u16_e32 v17, v17, v15 3780; GFX8-NEXT: v_max_i16_e32 v16, v16, v6 3781; GFX8-NEXT: v_min_i16_e32 v16, v16, v17 3782; GFX8-NEXT: v_max_i16_e32 v17, -1, v10 3783; GFX8-NEXT: v_sub_u16_e32 v17, v17, v13 3784; GFX8-NEXT: v_min_i16_e32 v18, -1, v10 3785; GFX8-NEXT: v_sub_u16_e32 v18, v18, v15 3786; GFX8-NEXT: v_max_i16_sdwa v6, v17, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3787; GFX8-NEXT: v_max_i16_e32 v17, -1, v3 3788; GFX8-NEXT: v_min_i16_e32 v6, v6, v18 3789; GFX8-NEXT: v_sub_u16_e32 v17, v17, v13 3790; GFX8-NEXT: v_min_i16_e32 v18, -1, v3 3791; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v3 3792; GFX8-NEXT: v_sub_u16_e32 v18, v18, v15 3793; GFX8-NEXT: v_max_i16_e32 v17, v17, v7 3794; GFX8-NEXT: v_min_i16_e32 v17, v17, v18 3795; GFX8-NEXT: v_max_i16_e32 v18, -1, v11 3796; GFX8-NEXT: v_sub_u16_e32 v13, v18, v13 3797; GFX8-NEXT: v_min_i16_e32 v18, -1, v11 3798; GFX8-NEXT: v_sub_u16_e32 v0, v0, v12 3799; GFX8-NEXT: v_sub_u16_sdwa v4, v8, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3800; GFX8-NEXT: v_sub_u16_e32 v15, v18, v15 3801; GFX8-NEXT: v_max_i16_sdwa v7, v13, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3802; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 3803; GFX8-NEXT: v_sub_u16_e32 v1, v1, v14 3804; GFX8-NEXT: v_sub_u16_sdwa v4, v9, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3805; GFX8-NEXT: v_min_i16_e32 v7, v7, v15 3806; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 3807; GFX8-NEXT: v_sub_u16_e32 v2, v2, v16 3808; GFX8-NEXT: v_sub_u16_sdwa v4, v10, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3809; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 3810; GFX8-NEXT: v_sub_u16_e32 v3, v3, v17 3811; GFX8-NEXT: v_sub_u16_sdwa v4, v11, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 3812; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 3813; GFX8-NEXT: s_setpc_b64 s[30:31] 3814; 3815; GFX9-LABEL: v_ssubsat_v8i16: 3816; GFX9: ; %bb.0: 3817; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3818; GFX9-NEXT: v_pk_sub_i16 v0, v0, v4 clamp 3819; GFX9-NEXT: v_pk_sub_i16 v1, v1, v5 clamp 3820; GFX9-NEXT: v_pk_sub_i16 v2, v2, v6 clamp 3821; GFX9-NEXT: v_pk_sub_i16 v3, v3, v7 clamp 3822; GFX9-NEXT: s_setpc_b64 s[30:31] 3823; 3824; GFX10-LABEL: v_ssubsat_v8i16: 3825; GFX10: ; %bb.0: 3826; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3827; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3828; GFX10-NEXT: v_pk_sub_i16 v0, v0, v4 clamp 3829; GFX10-NEXT: v_pk_sub_i16 v1, v1, v5 clamp 3830; GFX10-NEXT: v_pk_sub_i16 v2, v2, v6 clamp 3831; GFX10-NEXT: v_pk_sub_i16 v3, v3, v7 clamp 3832; GFX10-NEXT: s_setpc_b64 s[30:31] 3833 %result = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 3834 %cast = bitcast <8 x i16> %result to <4 x float> 3835 ret <4 x float> %cast 3836} 3837 3838define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inreg %rhs) { 3839; GFX6-LABEL: s_ssubsat_v8i16: 3840; GFX6: ; %bb.0: 3841; GFX6-NEXT: s_lshl_b32 s0, s0, 16 3842; GFX6-NEXT: s_brev_b32 s16, -2 3843; GFX6-NEXT: s_max_i32 s18, s0, -1 3844; GFX6-NEXT: s_lshl_b32 s8, s8, 16 3845; GFX6-NEXT: s_brev_b32 s17, 1 3846; GFX6-NEXT: s_sub_i32 s18, s18, s16 3847; GFX6-NEXT: s_min_i32 s19, s0, -1 3848; GFX6-NEXT: s_sub_i32 s19, s19, s17 3849; GFX6-NEXT: s_max_i32 s8, s18, s8 3850; GFX6-NEXT: s_min_i32 s8, s8, s19 3851; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3852; GFX6-NEXT: s_sub_i32 s0, s0, s8 3853; GFX6-NEXT: s_lshl_b32 s8, s9, 16 3854; GFX6-NEXT: s_max_i32 s9, s1, -1 3855; GFX6-NEXT: s_sub_i32 s9, s9, s16 3856; GFX6-NEXT: s_min_i32 s18, s1, -1 3857; GFX6-NEXT: s_sub_i32 s18, s18, s17 3858; GFX6-NEXT: s_max_i32 s8, s9, s8 3859; GFX6-NEXT: s_lshl_b32 s2, s2, 16 3860; GFX6-NEXT: s_min_i32 s8, s8, s18 3861; GFX6-NEXT: s_max_i32 s9, s2, -1 3862; GFX6-NEXT: s_sub_i32 s1, s1, s8 3863; GFX6-NEXT: s_lshl_b32 s8, s10, 16 3864; GFX6-NEXT: s_sub_i32 s9, s9, s16 3865; GFX6-NEXT: s_min_i32 s10, s2, -1 3866; GFX6-NEXT: s_sub_i32 s10, s10, s17 3867; GFX6-NEXT: s_max_i32 s8, s9, s8 3868; GFX6-NEXT: s_lshl_b32 s3, s3, 16 3869; GFX6-NEXT: s_min_i32 s8, s8, s10 3870; GFX6-NEXT: s_max_i32 s9, s3, -1 3871; GFX6-NEXT: s_sub_i32 s2, s2, s8 3872; GFX6-NEXT: s_lshl_b32 s8, s11, 16 3873; GFX6-NEXT: s_sub_i32 s9, s9, s16 3874; GFX6-NEXT: s_min_i32 s10, s3, -1 3875; GFX6-NEXT: s_sub_i32 s10, s10, s17 3876; GFX6-NEXT: s_max_i32 s8, s9, s8 3877; GFX6-NEXT: s_lshl_b32 s4, s4, 16 3878; GFX6-NEXT: s_min_i32 s8, s8, s10 3879; GFX6-NEXT: s_max_i32 s9, s4, -1 3880; GFX6-NEXT: s_sub_i32 s3, s3, s8 3881; GFX6-NEXT: s_lshl_b32 s8, s12, 16 3882; GFX6-NEXT: s_sub_i32 s9, s9, s16 3883; GFX6-NEXT: s_min_i32 s10, s4, -1 3884; GFX6-NEXT: s_sub_i32 s10, s10, s17 3885; GFX6-NEXT: s_max_i32 s8, s9, s8 3886; GFX6-NEXT: s_lshl_b32 s5, s5, 16 3887; GFX6-NEXT: s_min_i32 s8, s8, s10 3888; GFX6-NEXT: s_max_i32 s9, s5, -1 3889; GFX6-NEXT: s_sub_i32 s4, s4, s8 3890; GFX6-NEXT: s_lshl_b32 s8, s13, 16 3891; GFX6-NEXT: s_sub_i32 s9, s9, s16 3892; GFX6-NEXT: s_min_i32 s10, s5, -1 3893; GFX6-NEXT: s_sub_i32 s10, s10, s17 3894; GFX6-NEXT: s_max_i32 s8, s9, s8 3895; GFX6-NEXT: s_lshl_b32 s6, s6, 16 3896; GFX6-NEXT: s_min_i32 s8, s8, s10 3897; GFX6-NEXT: s_max_i32 s9, s6, -1 3898; GFX6-NEXT: s_sub_i32 s5, s5, s8 3899; GFX6-NEXT: s_lshl_b32 s8, s14, 16 3900; GFX6-NEXT: s_sub_i32 s9, s9, s16 3901; GFX6-NEXT: s_min_i32 s10, s6, -1 3902; GFX6-NEXT: s_sub_i32 s10, s10, s17 3903; GFX6-NEXT: s_max_i32 s8, s9, s8 3904; GFX6-NEXT: s_lshl_b32 s7, s7, 16 3905; GFX6-NEXT: s_min_i32 s8, s8, s10 3906; GFX6-NEXT: s_max_i32 s9, s7, -1 3907; GFX6-NEXT: s_sub_i32 s6, s6, s8 3908; GFX6-NEXT: s_lshl_b32 s8, s15, 16 3909; GFX6-NEXT: s_sub_i32 s9, s9, s16 3910; GFX6-NEXT: s_min_i32 s10, s7, -1 3911; GFX6-NEXT: s_sub_i32 s10, s10, s17 3912; GFX6-NEXT: s_max_i32 s8, s9, s8 3913; GFX6-NEXT: s_min_i32 s8, s8, s10 3914; GFX6-NEXT: s_ashr_i32 s1, s1, 16 3915; GFX6-NEXT: s_sub_i32 s7, s7, s8 3916; GFX6-NEXT: s_mov_b32 s8, 0xffff 3917; GFX6-NEXT: s_ashr_i32 s0, s0, 16 3918; GFX6-NEXT: s_and_b32 s1, s1, s8 3919; GFX6-NEXT: s_ashr_i32 s2, s2, 16 3920; GFX6-NEXT: s_ashr_i32 s3, s3, 16 3921; GFX6-NEXT: s_and_b32 s0, s0, s8 3922; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3923; GFX6-NEXT: s_ashr_i32 s5, s5, 16 3924; GFX6-NEXT: s_or_b32 s0, s0, s1 3925; GFX6-NEXT: s_and_b32 s1, s2, s8 3926; GFX6-NEXT: s_and_b32 s2, s3, s8 3927; GFX6-NEXT: s_ashr_i32 s4, s4, 16 3928; GFX6-NEXT: s_ashr_i32 s7, s7, 16 3929; GFX6-NEXT: s_lshl_b32 s2, s2, 16 3930; GFX6-NEXT: s_and_b32 s3, s5, s8 3931; GFX6-NEXT: s_ashr_i32 s6, s6, 16 3932; GFX6-NEXT: s_or_b32 s1, s1, s2 3933; GFX6-NEXT: s_and_b32 s2, s4, s8 3934; GFX6-NEXT: s_lshl_b32 s3, s3, 16 3935; GFX6-NEXT: s_and_b32 s4, s7, s8 3936; GFX6-NEXT: s_or_b32 s2, s2, s3 3937; GFX6-NEXT: s_and_b32 s3, s6, s8 3938; GFX6-NEXT: s_lshl_b32 s4, s4, 16 3939; GFX6-NEXT: s_or_b32 s3, s3, s4 3940; GFX6-NEXT: ; return to shader part epilog 3941; 3942; GFX8-LABEL: s_ssubsat_v8i16: 3943; GFX8: ; %bb.0: 3944; GFX8-NEXT: s_sext_i32_i16 s18, s0 3945; GFX8-NEXT: s_sext_i32_i16 s19, -1 3946; GFX8-NEXT: s_movk_i32 s16, 0x7fff 3947; GFX8-NEXT: s_max_i32 s20, s18, s19 3948; GFX8-NEXT: s_sub_i32 s20, s20, s16 3949; GFX8-NEXT: s_lshr_b32 s12, s4, 16 3950; GFX8-NEXT: s_movk_i32 s17, 0x8000 3951; GFX8-NEXT: s_min_i32 s18, s18, s19 3952; GFX8-NEXT: s_sext_i32_i16 s20, s20 3953; GFX8-NEXT: s_sext_i32_i16 s4, s4 3954; GFX8-NEXT: s_sub_i32 s18, s18, s17 3955; GFX8-NEXT: s_max_i32 s4, s20, s4 3956; GFX8-NEXT: s_sext_i32_i16 s4, s4 3957; GFX8-NEXT: s_sext_i32_i16 s18, s18 3958; GFX8-NEXT: s_lshr_b32 s8, s0, 16 3959; GFX8-NEXT: s_min_i32 s4, s4, s18 3960; GFX8-NEXT: s_sub_i32 s0, s0, s4 3961; GFX8-NEXT: s_sext_i32_i16 s4, s8 3962; GFX8-NEXT: s_max_i32 s18, s4, s19 3963; GFX8-NEXT: s_sub_i32 s18, s18, s16 3964; GFX8-NEXT: s_min_i32 s4, s4, s19 3965; GFX8-NEXT: s_sext_i32_i16 s18, s18 3966; GFX8-NEXT: s_sext_i32_i16 s12, s12 3967; GFX8-NEXT: s_sub_i32 s4, s4, s17 3968; GFX8-NEXT: s_max_i32 s12, s18, s12 3969; GFX8-NEXT: s_sext_i32_i16 s12, s12 3970; GFX8-NEXT: s_sext_i32_i16 s4, s4 3971; GFX8-NEXT: s_min_i32 s4, s12, s4 3972; GFX8-NEXT: s_sub_i32 s4, s8, s4 3973; GFX8-NEXT: s_sext_i32_i16 s8, s1 3974; GFX8-NEXT: s_max_i32 s12, s8, s19 3975; GFX8-NEXT: s_sub_i32 s12, s12, s16 3976; GFX8-NEXT: s_lshr_b32 s13, s5, 16 3977; GFX8-NEXT: s_min_i32 s8, s8, s19 3978; GFX8-NEXT: s_sext_i32_i16 s12, s12 3979; GFX8-NEXT: s_sext_i32_i16 s5, s5 3980; GFX8-NEXT: s_sub_i32 s8, s8, s17 3981; GFX8-NEXT: s_max_i32 s5, s12, s5 3982; GFX8-NEXT: s_sext_i32_i16 s5, s5 3983; GFX8-NEXT: s_sext_i32_i16 s8, s8 3984; GFX8-NEXT: s_lshr_b32 s9, s1, 16 3985; GFX8-NEXT: s_min_i32 s5, s5, s8 3986; GFX8-NEXT: s_sub_i32 s1, s1, s5 3987; GFX8-NEXT: s_sext_i32_i16 s5, s9 3988; GFX8-NEXT: s_max_i32 s8, s5, s19 3989; GFX8-NEXT: s_sub_i32 s8, s8, s16 3990; GFX8-NEXT: s_min_i32 s5, s5, s19 3991; GFX8-NEXT: s_sext_i32_i16 s8, s8 3992; GFX8-NEXT: s_sext_i32_i16 s12, s13 3993; GFX8-NEXT: s_sub_i32 s5, s5, s17 3994; GFX8-NEXT: s_max_i32 s8, s8, s12 3995; GFX8-NEXT: s_sext_i32_i16 s8, s8 3996; GFX8-NEXT: s_sext_i32_i16 s5, s5 3997; GFX8-NEXT: s_min_i32 s5, s8, s5 3998; GFX8-NEXT: s_sext_i32_i16 s8, s2 3999; GFX8-NEXT: s_sub_i32 s5, s9, s5 4000; GFX8-NEXT: s_max_i32 s9, s8, s19 4001; GFX8-NEXT: s_sub_i32 s9, s9, s16 4002; GFX8-NEXT: s_lshr_b32 s14, s6, 16 4003; GFX8-NEXT: s_min_i32 s8, s8, s19 4004; GFX8-NEXT: s_sext_i32_i16 s9, s9 4005; GFX8-NEXT: s_sext_i32_i16 s6, s6 4006; GFX8-NEXT: s_sub_i32 s8, s8, s17 4007; GFX8-NEXT: s_max_i32 s6, s9, s6 4008; GFX8-NEXT: s_sext_i32_i16 s6, s6 4009; GFX8-NEXT: s_sext_i32_i16 s8, s8 4010; GFX8-NEXT: s_lshr_b32 s10, s2, 16 4011; GFX8-NEXT: s_min_i32 s6, s6, s8 4012; GFX8-NEXT: s_sub_i32 s2, s2, s6 4013; GFX8-NEXT: s_sext_i32_i16 s6, s10 4014; GFX8-NEXT: s_max_i32 s8, s6, s19 4015; GFX8-NEXT: s_sub_i32 s8, s8, s16 4016; GFX8-NEXT: s_min_i32 s6, s6, s19 4017; GFX8-NEXT: s_sext_i32_i16 s8, s8 4018; GFX8-NEXT: s_sext_i32_i16 s9, s14 4019; GFX8-NEXT: s_sub_i32 s6, s6, s17 4020; GFX8-NEXT: s_max_i32 s8, s8, s9 4021; GFX8-NEXT: s_sext_i32_i16 s8, s8 4022; GFX8-NEXT: s_sext_i32_i16 s6, s6 4023; GFX8-NEXT: s_min_i32 s6, s8, s6 4024; GFX8-NEXT: s_sext_i32_i16 s8, s3 4025; GFX8-NEXT: s_max_i32 s9, s8, s19 4026; GFX8-NEXT: s_sub_i32 s9, s9, s16 4027; GFX8-NEXT: s_lshr_b32 s15, s7, 16 4028; GFX8-NEXT: s_min_i32 s8, s8, s19 4029; GFX8-NEXT: s_sext_i32_i16 s9, s9 4030; GFX8-NEXT: s_sext_i32_i16 s7, s7 4031; GFX8-NEXT: s_sub_i32 s8, s8, s17 4032; GFX8-NEXT: s_max_i32 s7, s9, s7 4033; GFX8-NEXT: s_sext_i32_i16 s7, s7 4034; GFX8-NEXT: s_sext_i32_i16 s8, s8 4035; GFX8-NEXT: s_lshr_b32 s11, s3, 16 4036; GFX8-NEXT: s_min_i32 s7, s7, s8 4037; GFX8-NEXT: s_sub_i32 s3, s3, s7 4038; GFX8-NEXT: s_sext_i32_i16 s7, s11 4039; GFX8-NEXT: s_max_i32 s8, s7, s19 4040; GFX8-NEXT: s_sub_i32 s8, s8, s16 4041; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 4042; GFX8-NEXT: s_min_i32 s7, s7, s19 4043; GFX8-NEXT: s_sext_i32_i16 s8, s8 4044; GFX8-NEXT: s_sext_i32_i16 s9, s15 4045; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 4046; GFX8-NEXT: s_lshl_b32 s4, s4, 16 4047; GFX8-NEXT: s_sub_i32 s7, s7, s17 4048; GFX8-NEXT: s_max_i32 s8, s8, s9 4049; GFX8-NEXT: s_or_b32 s0, s0, s4 4050; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000 4051; GFX8-NEXT: s_sub_i32 s6, s10, s6 4052; GFX8-NEXT: s_sext_i32_i16 s8, s8 4053; GFX8-NEXT: s_sext_i32_i16 s7, s7 4054; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 4055; GFX8-NEXT: s_lshl_b32 s4, s4, 16 4056; GFX8-NEXT: s_min_i32 s7, s8, s7 4057; GFX8-NEXT: s_or_b32 s1, s1, s4 4058; GFX8-NEXT: s_bfe_u32 s4, s6, 0x100000 4059; GFX8-NEXT: s_sub_i32 s7, s11, s7 4060; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 4061; GFX8-NEXT: s_lshl_b32 s4, s4, 16 4062; GFX8-NEXT: s_or_b32 s2, s2, s4 4063; GFX8-NEXT: s_bfe_u32 s4, s7, 0x100000 4064; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 4065; GFX8-NEXT: s_lshl_b32 s4, s4, 16 4066; GFX8-NEXT: s_or_b32 s3, s3, s4 4067; GFX8-NEXT: ; return to shader part epilog 4068; 4069; GFX9-LABEL: s_ssubsat_v8i16: 4070; GFX9: ; %bb.0: 4071; GFX9-NEXT: v_mov_b32_e32 v0, s4 4072; GFX9-NEXT: v_mov_b32_e32 v1, s5 4073; GFX9-NEXT: v_mov_b32_e32 v2, s6 4074; GFX9-NEXT: v_mov_b32_e32 v3, s7 4075; GFX9-NEXT: v_pk_sub_i16 v0, s0, v0 clamp 4076; GFX9-NEXT: v_pk_sub_i16 v1, s1, v1 clamp 4077; GFX9-NEXT: v_pk_sub_i16 v2, s2, v2 clamp 4078; GFX9-NEXT: v_pk_sub_i16 v3, s3, v3 clamp 4079; GFX9-NEXT: v_readfirstlane_b32 s0, v0 4080; GFX9-NEXT: v_readfirstlane_b32 s1, v1 4081; GFX9-NEXT: v_readfirstlane_b32 s2, v2 4082; GFX9-NEXT: v_readfirstlane_b32 s3, v3 4083; GFX9-NEXT: ; return to shader part epilog 4084; 4085; GFX10-LABEL: s_ssubsat_v8i16: 4086; GFX10: ; %bb.0: 4087; GFX10-NEXT: v_pk_sub_i16 v0, s0, s4 clamp 4088; GFX10-NEXT: v_pk_sub_i16 v1, s1, s5 clamp 4089; GFX10-NEXT: v_pk_sub_i16 v2, s2, s6 clamp 4090; GFX10-NEXT: v_pk_sub_i16 v3, s3, s7 clamp 4091; GFX10-NEXT: v_readfirstlane_b32 s0, v0 4092; GFX10-NEXT: v_readfirstlane_b32 s1, v1 4093; GFX10-NEXT: v_readfirstlane_b32 s2, v2 4094; GFX10-NEXT: v_readfirstlane_b32 s3, v3 4095; GFX10-NEXT: ; return to shader part epilog 4096 %result = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 4097 %cast = bitcast <8 x i16> %result to <4 x i32> 4098 ret <4 x i32> %cast 4099} 4100 4101; FIXME: i48 broken because i48 add broken 4102; define i48 @v_ssubsat_i48(i48 %lhs, i48 %rhs) { 4103; %result = call i48 @llvm.ssub.sat.i48(i48 %lhs, i48 %rhs) 4104; ret i48 %result 4105; } 4106 4107; define amdgpu_ps i48 @s_ssubsat_i48(i48 inreg %lhs, i48 inreg %rhs) { 4108; %result = call i48 @llvm.ssub.sat.i48(i48 %lhs, i48 %rhs) 4109; ret i48 %result 4110; } 4111 4112; define amdgpu_ps <2 x float> @ssubsat_i48_sv(i48 inreg %lhs, i48 %rhs) { 4113; %result = call i48 @llvm.ssub.sat.i48(i48 %lhs, i48 %rhs) 4114; %ext.result = zext i48 %result to i64 4115; %cast = bitcast i64 %ext.result to <2 x float> 4116; ret <2 x float> %cast 4117; } 4118 4119; define amdgpu_ps <2 x float> @ssubsat_i48_vs(i48 %lhs, i48 inreg %rhs) { 4120; %result = call i48 @llvm.ssub.sat.i48(i48 %lhs, i48 %rhs) 4121; %ext.result = zext i48 %result to i64 4122; %cast = bitcast i64 %ext.result to <2 x float> 4123; ret <2 x float> %cast 4124; } 4125 4126define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) { 4127; GFX6-LABEL: v_ssubsat_i64: 4128; GFX6: ; %bb.0: 4129; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4130; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 4131; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc 4132; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 4133; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3] 4134; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v5 4135; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 4136; GFX6-NEXT: v_add_i32_e64 v2, s[6:7], 0, v0 4137; GFX6-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] 4138; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc 4139; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc 4140; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 4141; GFX6-NEXT: s_setpc_b64 s[30:31] 4142; 4143; GFX8-LABEL: v_ssubsat_i64: 4144; GFX8: ; %bb.0: 4145; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4146; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v0, v2 4147; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc 4148; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 4149; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3] 4150; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v5 4151; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 4152; GFX8-NEXT: v_add_u32_e64 v2, s[6:7], 0, v0 4153; GFX8-NEXT: v_addc_u32_e64 v1, s[6:7], v0, v1, s[6:7] 4154; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 4155; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc 4156; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 4157; GFX8-NEXT: s_setpc_b64 s[30:31] 4158; 4159; GFX9-LABEL: v_ssubsat_i64: 4160; GFX9: ; %bb.0: 4161; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4162; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v2 4163; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc 4164; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 4165; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3] 4166; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v5 4167; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 4168; GFX9-NEXT: v_add_co_u32_e64 v2, s[6:7], 0, v0 4169; GFX9-NEXT: v_addc_co_u32_e64 v1, s[6:7], v0, v1, s[6:7] 4170; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc 4171; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc 4172; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 4173; GFX9-NEXT: s_setpc_b64 s[30:31] 4174; 4175; GFX10-LABEL: v_ssubsat_i64: 4176; GFX10: ; %bb.0: 4177; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4178; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4179; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2 4180; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo 4181; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, 0, v[2:3] 4182; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5 4183; GFX10-NEXT: v_cmp_lt_i64_e64 s4, v[4:5], v[0:1] 4184; GFX10-NEXT: v_add_co_u32 v0, s5, v6, 0 4185; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s5, 0x80000000, v6, s5 4186; GFX10-NEXT: s_xor_b32 vcc_lo, vcc_lo, s4 4187; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo 4188; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo 4189; GFX10-NEXT: s_setpc_b64 s[30:31] 4190 %result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) 4191 ret i64 %result 4192} 4193 4194define amdgpu_ps i64 @s_ssubsat_i64(i64 inreg %lhs, i64 inreg %rhs) { 4195; GFX6-LABEL: s_ssubsat_i64: 4196; GFX6: ; %bb.0: 4197; GFX6-NEXT: s_sub_u32 s4, s0, s2 4198; GFX6-NEXT: s_cselect_b32 s5, 1, 0 4199; GFX6-NEXT: s_and_b32 s5, s5, 1 4200; GFX6-NEXT: s_cmp_lg_u32 s5, 0 4201; GFX6-NEXT: v_mov_b32_e32 v0, s0 4202; GFX6-NEXT: s_subb_u32 s5, s1, s3 4203; GFX6-NEXT: v_mov_b32_e32 v1, s1 4204; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1] 4205; GFX6-NEXT: v_cmp_gt_i64_e64 s[0:1], s[2:3], 0 4206; GFX6-NEXT: s_ashr_i32 s2, s5, 31 4207; GFX6-NEXT: s_xor_b64 vcc, s[0:1], vcc 4208; GFX6-NEXT: s_add_u32 s0, s2, 0 4209; GFX6-NEXT: s_cselect_b32 s1, 1, 0 4210; GFX6-NEXT: s_and_b32 s1, s1, 1 4211; GFX6-NEXT: s_cmp_lg_u32 s1, 0 4212; GFX6-NEXT: s_addc_u32 s1, s2, 0x80000000 4213; GFX6-NEXT: v_mov_b32_e32 v0, s4 4214; GFX6-NEXT: v_mov_b32_e32 v1, s0 4215; GFX6-NEXT: v_mov_b32_e32 v2, s1 4216; GFX6-NEXT: v_mov_b32_e32 v3, s5 4217; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 4218; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc 4219; GFX6-NEXT: v_readfirstlane_b32 s0, v0 4220; GFX6-NEXT: v_readfirstlane_b32 s1, v1 4221; GFX6-NEXT: ; return to shader part epilog 4222; 4223; GFX8-LABEL: s_ssubsat_i64: 4224; GFX8: ; %bb.0: 4225; GFX8-NEXT: s_sub_u32 s4, s0, s2 4226; GFX8-NEXT: s_cselect_b32 s5, 1, 0 4227; GFX8-NEXT: s_and_b32 s5, s5, 1 4228; GFX8-NEXT: s_cmp_lg_u32 s5, 0 4229; GFX8-NEXT: v_mov_b32_e32 v0, s0 4230; GFX8-NEXT: s_subb_u32 s5, s1, s3 4231; GFX8-NEXT: v_mov_b32_e32 v1, s1 4232; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1] 4233; GFX8-NEXT: v_cmp_gt_i64_e64 s[0:1], s[2:3], 0 4234; GFX8-NEXT: s_ashr_i32 s2, s5, 31 4235; GFX8-NEXT: s_xor_b64 vcc, s[0:1], vcc 4236; GFX8-NEXT: s_add_u32 s0, s2, 0 4237; GFX8-NEXT: s_cselect_b32 s1, 1, 0 4238; GFX8-NEXT: s_and_b32 s1, s1, 1 4239; GFX8-NEXT: s_cmp_lg_u32 s1, 0 4240; GFX8-NEXT: s_addc_u32 s1, s2, 0x80000000 4241; GFX8-NEXT: v_mov_b32_e32 v0, s4 4242; GFX8-NEXT: v_mov_b32_e32 v1, s0 4243; GFX8-NEXT: v_mov_b32_e32 v2, s1 4244; GFX8-NEXT: v_mov_b32_e32 v3, s5 4245; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 4246; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc 4247; GFX8-NEXT: v_readfirstlane_b32 s0, v0 4248; GFX8-NEXT: v_readfirstlane_b32 s1, v1 4249; GFX8-NEXT: ; return to shader part epilog 4250; 4251; GFX9-LABEL: s_ssubsat_i64: 4252; GFX9: ; %bb.0: 4253; GFX9-NEXT: s_sub_u32 s4, s0, s2 4254; GFX9-NEXT: s_cselect_b32 s5, 1, 0 4255; GFX9-NEXT: s_and_b32 s5, s5, 1 4256; GFX9-NEXT: s_cmp_lg_u32 s5, 0 4257; GFX9-NEXT: v_mov_b32_e32 v0, s0 4258; GFX9-NEXT: s_subb_u32 s5, s1, s3 4259; GFX9-NEXT: v_mov_b32_e32 v1, s1 4260; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1] 4261; GFX9-NEXT: v_cmp_gt_i64_e64 s[0:1], s[2:3], 0 4262; GFX9-NEXT: s_ashr_i32 s2, s5, 31 4263; GFX9-NEXT: s_xor_b64 vcc, s[0:1], vcc 4264; GFX9-NEXT: s_add_u32 s0, s2, 0 4265; GFX9-NEXT: s_cselect_b32 s1, 1, 0 4266; GFX9-NEXT: s_and_b32 s1, s1, 1 4267; GFX9-NEXT: s_cmp_lg_u32 s1, 0 4268; GFX9-NEXT: s_addc_u32 s1, s2, 0x80000000 4269; GFX9-NEXT: v_mov_b32_e32 v0, s4 4270; GFX9-NEXT: v_mov_b32_e32 v1, s0 4271; GFX9-NEXT: v_mov_b32_e32 v2, s1 4272; GFX9-NEXT: v_mov_b32_e32 v3, s5 4273; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 4274; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc 4275; GFX9-NEXT: v_readfirstlane_b32 s0, v0 4276; GFX9-NEXT: v_readfirstlane_b32 s1, v1 4277; GFX9-NEXT: ; return to shader part epilog 4278; 4279; GFX10-LABEL: s_ssubsat_i64: 4280; GFX10: ; %bb.0: 4281; GFX10-NEXT: s_sub_u32 s4, s0, s2 4282; GFX10-NEXT: s_cselect_b32 s5, 1, 0 4283; GFX10-NEXT: v_mov_b32_e32 v0, s4 4284; GFX10-NEXT: s_and_b32 s5, s5, 1 4285; GFX10-NEXT: s_cmp_lg_u32 s5, 0 4286; GFX10-NEXT: s_subb_u32 s5, s1, s3 4287; GFX10-NEXT: v_cmp_lt_i64_e64 s0, s[4:5], s[0:1] 4288; GFX10-NEXT: v_cmp_gt_i64_e64 s1, s[2:3], 0 4289; GFX10-NEXT: s_ashr_i32 s2, s5, 31 4290; GFX10-NEXT: v_mov_b32_e32 v1, s5 4291; GFX10-NEXT: s_xor_b32 s3, s1, s0 4292; GFX10-NEXT: s_add_u32 s0, s2, 0 4293; GFX10-NEXT: s_cselect_b32 s1, 1, 0 4294; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s0, s3 4295; GFX10-NEXT: s_and_b32 s1, s1, 1 4296; GFX10-NEXT: s_cmp_lg_u32 s1, 0 4297; GFX10-NEXT: s_addc_u32 s1, s2, 0x80000000 4298; GFX10-NEXT: v_readfirstlane_b32 s0, v0 4299; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s1, s3 4300; GFX10-NEXT: v_readfirstlane_b32 s1, v1 4301; GFX10-NEXT: ; return to shader part epilog 4302 %result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) 4303 ret i64 %result 4304} 4305 4306define amdgpu_ps <2 x float> @ssubsat_i64_sv(i64 inreg %lhs, i64 %rhs) { 4307; GFX6-LABEL: ssubsat_i64_sv: 4308; GFX6: ; %bb.0: 4309; GFX6-NEXT: v_mov_b32_e32 v3, s1 4310; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s0, v0 4311; GFX6-NEXT: v_subb_u32_e32 v3, vcc, v3, v1, vcc 4312; GFX6-NEXT: v_cmp_gt_i64_e32 vcc, s[0:1], v[2:3] 4313; GFX6-NEXT: v_cmp_lt_i64_e64 s[0:1], 0, v[0:1] 4314; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v3 4315; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 4316; GFX6-NEXT: v_add_i32_e64 v4, s[2:3], 0, v0 4317; GFX6-NEXT: v_addc_u32_e64 v1, s[2:3], v0, v1, s[2:3] 4318; GFX6-NEXT: s_xor_b64 vcc, s[0:1], vcc 4319; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 4320; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 4321; GFX6-NEXT: ; return to shader part epilog 4322; 4323; GFX8-LABEL: ssubsat_i64_sv: 4324; GFX8: ; %bb.0: 4325; GFX8-NEXT: v_mov_b32_e32 v3, s1 4326; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s0, v0 4327; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v3, v1, vcc 4328; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, s[0:1], v[2:3] 4329; GFX8-NEXT: v_cmp_lt_i64_e64 s[0:1], 0, v[0:1] 4330; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v3 4331; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 4332; GFX8-NEXT: v_add_u32_e64 v4, s[2:3], 0, v0 4333; GFX8-NEXT: v_addc_u32_e64 v1, s[2:3], v0, v1, s[2:3] 4334; GFX8-NEXT: s_xor_b64 vcc, s[0:1], vcc 4335; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 4336; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 4337; GFX8-NEXT: ; return to shader part epilog 4338; 4339; GFX9-LABEL: ssubsat_i64_sv: 4340; GFX9: ; %bb.0: 4341; GFX9-NEXT: v_mov_b32_e32 v3, s1 4342; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s0, v0 4343; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v1, vcc 4344; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, s[0:1], v[2:3] 4345; GFX9-NEXT: v_cmp_lt_i64_e64 s[0:1], 0, v[0:1] 4346; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v3 4347; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 4348; GFX9-NEXT: v_add_co_u32_e64 v4, s[2:3], 0, v0 4349; GFX9-NEXT: v_addc_co_u32_e64 v1, s[2:3], v0, v1, s[2:3] 4350; GFX9-NEXT: s_xor_b64 vcc, s[0:1], vcc 4351; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 4352; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 4353; GFX9-NEXT: ; return to shader part epilog 4354; 4355; GFX10-LABEL: ssubsat_i64_sv: 4356; GFX10: ; %bb.0: 4357; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, s0, v0 4358; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo 4359; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, 0, v[0:1] 4360; GFX10-NEXT: v_ashrrev_i32_e32 v4, 31, v3 4361; GFX10-NEXT: v_cmp_gt_i64_e64 s0, s[0:1], v[2:3] 4362; GFX10-NEXT: v_add_co_u32 v0, s1, v4, 0 4363; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s1, 0x80000000, v4, s1 4364; GFX10-NEXT: s_xor_b32 vcc_lo, vcc_lo, s0 4365; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 4366; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo 4367; GFX10-NEXT: ; return to shader part epilog 4368 %result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) 4369 %cast = bitcast i64 %result to <2 x float> 4370 ret <2 x float> %cast 4371} 4372 4373define amdgpu_ps <2 x float> @ssubsat_i64_vs(i64 %lhs, i64 inreg %rhs) { 4374; GFX6-LABEL: ssubsat_i64_vs: 4375; GFX6: ; %bb.0: 4376; GFX6-NEXT: v_mov_b32_e32 v3, s1 4377; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, s0, v0 4378; GFX6-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc 4379; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[2:3], v[0:1] 4380; GFX6-NEXT: v_cmp_gt_i64_e64 s[2:3], s[0:1], 0 4381; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v3 4382; GFX6-NEXT: v_bfrev_b32_e32 v1, 1 4383; GFX6-NEXT: v_add_i32_e64 v4, s[0:1], 0, v0 4384; GFX6-NEXT: v_addc_u32_e64 v1, s[0:1], v0, v1, s[0:1] 4385; GFX6-NEXT: s_xor_b64 vcc, s[2:3], vcc 4386; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 4387; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 4388; GFX6-NEXT: ; return to shader part epilog 4389; 4390; GFX8-LABEL: ssubsat_i64_vs: 4391; GFX8: ; %bb.0: 4392; GFX8-NEXT: v_mov_b32_e32 v3, s1 4393; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s0, v0 4394; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc 4395; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[2:3], v[0:1] 4396; GFX8-NEXT: v_cmp_gt_i64_e64 s[2:3], s[0:1], 0 4397; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v3 4398; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 4399; GFX8-NEXT: v_add_u32_e64 v4, s[0:1], 0, v0 4400; GFX8-NEXT: v_addc_u32_e64 v1, s[0:1], v0, v1, s[0:1] 4401; GFX8-NEXT: s_xor_b64 vcc, s[2:3], vcc 4402; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 4403; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 4404; GFX8-NEXT: ; return to shader part epilog 4405; 4406; GFX9-LABEL: ssubsat_i64_vs: 4407; GFX9: ; %bb.0: 4408; GFX9-NEXT: v_mov_b32_e32 v3, s1 4409; GFX9-NEXT: v_subrev_co_u32_e32 v2, vcc, s0, v0 4410; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc 4411; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[2:3], v[0:1] 4412; GFX9-NEXT: v_cmp_gt_i64_e64 s[2:3], s[0:1], 0 4413; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v3 4414; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 4415; GFX9-NEXT: v_add_co_u32_e64 v4, s[0:1], 0, v0 4416; GFX9-NEXT: v_addc_co_u32_e64 v1, s[0:1], v0, v1, s[0:1] 4417; GFX9-NEXT: s_xor_b64 vcc, s[2:3], vcc 4418; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 4419; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 4420; GFX9-NEXT: ; return to shader part epilog 4421; 4422; GFX10-LABEL: ssubsat_i64_vs: 4423; GFX10: ; %bb.0: 4424; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v0, s0 4425; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo 4426; GFX10-NEXT: v_cmp_gt_i64_e64 s1, s[0:1], 0 4427; GFX10-NEXT: v_ashrrev_i32_e32 v4, 31, v3 4428; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[2:3], v[0:1] 4429; GFX10-NEXT: v_add_co_u32 v0, s0, v4, 0 4430; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0x80000000, v4, s0 4431; GFX10-NEXT: s_xor_b32 vcc_lo, s1, vcc_lo 4432; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo 4433; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo 4434; GFX10-NEXT: ; return to shader part epilog 4435 %result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) 4436 %cast = bitcast i64 %result to <2 x float> 4437 ret <2 x float> %cast 4438} 4439 4440define <2 x i64> @v_ssubsat_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { 4441; GFX6-LABEL: v_ssubsat_v2i64: 4442; GFX6: ; %bb.0: 4443; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4444; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v0, v4 4445; GFX6-NEXT: v_subb_u32_e32 v9, vcc, v1, v5, vcc 4446; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] 4447; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[4:5] 4448; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v9 4449; GFX6-NEXT: v_bfrev_b32_e32 v10, 1 4450; GFX6-NEXT: v_add_i32_e64 v1, s[6:7], 0, v0 4451; GFX6-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] 4452; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc 4453; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc 4454; GFX6-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc 4455; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v2, v6 4456; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v3, v7, vcc 4457; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] 4458; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[6:7] 4459; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v5 4460; GFX6-NEXT: v_add_i32_e64 v3, s[6:7], 0, v2 4461; GFX6-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] 4462; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc 4463; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc 4464; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc 4465; GFX6-NEXT: s_setpc_b64 s[30:31] 4466; 4467; GFX8-LABEL: v_ssubsat_v2i64: 4468; GFX8: ; %bb.0: 4469; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4470; GFX8-NEXT: v_sub_u32_e32 v8, vcc, v0, v4 4471; GFX8-NEXT: v_subb_u32_e32 v9, vcc, v1, v5, vcc 4472; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] 4473; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[4:5] 4474; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v9 4475; GFX8-NEXT: v_bfrev_b32_e32 v10, 1 4476; GFX8-NEXT: v_add_u32_e64 v1, s[6:7], 0, v0 4477; GFX8-NEXT: v_addc_u32_e64 v4, s[6:7], v0, v10, s[6:7] 4478; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 4479; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc 4480; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc 4481; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v2, v6 4482; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v3, v7, vcc 4483; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] 4484; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[6:7] 4485; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v5 4486; GFX8-NEXT: v_add_u32_e64 v3, s[6:7], 0, v2 4487; GFX8-NEXT: v_addc_u32_e64 v6, s[6:7], v2, v10, s[6:7] 4488; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 4489; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc 4490; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc 4491; GFX8-NEXT: s_setpc_b64 s[30:31] 4492; 4493; GFX9-LABEL: v_ssubsat_v2i64: 4494; GFX9: ; %bb.0: 4495; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4496; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, v0, v4 4497; GFX9-NEXT: v_subb_co_u32_e32 v9, vcc, v1, v5, vcc 4498; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[8:9], v[0:1] 4499; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[4:5] 4500; GFX9-NEXT: v_ashrrev_i32_e32 v0, 31, v9 4501; GFX9-NEXT: v_bfrev_b32_e32 v10, 1 4502; GFX9-NEXT: v_add_co_u32_e64 v1, s[6:7], 0, v0 4503; GFX9-NEXT: v_addc_co_u32_e64 v4, s[6:7], v0, v10, s[6:7] 4504; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc 4505; GFX9-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc 4506; GFX9-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc 4507; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v2, v6 4508; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v3, v7, vcc 4509; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[2:3] 4510; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[6:7] 4511; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v5 4512; GFX9-NEXT: v_add_co_u32_e64 v3, s[6:7], 0, v2 4513; GFX9-NEXT: v_addc_co_u32_e64 v6, s[6:7], v2, v10, s[6:7] 4514; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc 4515; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc 4516; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v6, vcc 4517; GFX9-NEXT: s_setpc_b64 s[30:31] 4518; 4519; GFX10-LABEL: v_ssubsat_v2i64: 4520; GFX10: ; %bb.0: 4521; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4522; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4523; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v0, v4 4524; GFX10-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, v1, v5, vcc_lo 4525; GFX10-NEXT: v_sub_co_u32 v10, vcc_lo, v2, v6 4526; GFX10-NEXT: v_sub_co_ci_u32_e32 v11, vcc_lo, v3, v7, vcc_lo 4527; GFX10-NEXT: v_ashrrev_i32_e32 v12, 31, v9 4528; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[0:1] 4529; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[4:5] 4530; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v11 4531; GFX10-NEXT: v_cmp_lt_i64_e64 s6, 0, v[6:7] 4532; GFX10-NEXT: v_add_co_u32 v1, s5, v12, 0 4533; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s5, 0x80000000, v12, s5 4534; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[10:11], v[2:3] 4535; GFX10-NEXT: v_add_co_u32 v2, s7, v0, 0 4536; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, 0x80000000, v0, s7 4537; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo 4538; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc_lo 4539; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc_lo 4540; GFX10-NEXT: s_xor_b32 vcc_lo, s6, s5 4541; GFX10-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc_lo 4542; GFX10-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc_lo 4543; GFX10-NEXT: s_setpc_b64 s[30:31] 4544 %result = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) 4545 ret <2 x i64> %result 4546} 4547 4548define amdgpu_ps <2 x i64> @s_ssubsat_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs) { 4549; GFX6-LABEL: s_ssubsat_v2i64: 4550; GFX6: ; %bb.0: 4551; GFX6-NEXT: s_sub_u32 s8, s0, s4 4552; GFX6-NEXT: s_cselect_b32 s9, 1, 0 4553; GFX6-NEXT: s_and_b32 s9, s9, 1 4554; GFX6-NEXT: s_cmp_lg_u32 s9, 0 4555; GFX6-NEXT: v_mov_b32_e32 v0, s0 4556; GFX6-NEXT: s_subb_u32 s9, s1, s5 4557; GFX6-NEXT: v_mov_b32_e32 v1, s1 4558; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, s[8:9], v[0:1] 4559; GFX6-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], 0 4560; GFX6-NEXT: s_ashr_i32 s4, s9, 31 4561; GFX6-NEXT: s_xor_b64 vcc, s[0:1], vcc 4562; GFX6-NEXT: s_add_u32 s0, s4, 0 4563; GFX6-NEXT: s_cselect_b32 s1, 1, 0 4564; GFX6-NEXT: s_and_b32 s1, s1, 1 4565; GFX6-NEXT: s_brev_b32 s5, 1 4566; GFX6-NEXT: s_cmp_lg_u32 s1, 0 4567; GFX6-NEXT: s_addc_u32 s1, s4, s5 4568; GFX6-NEXT: v_mov_b32_e32 v1, s0 4569; GFX6-NEXT: s_sub_u32 s0, s2, s6 4570; GFX6-NEXT: v_mov_b32_e32 v2, s1 4571; GFX6-NEXT: s_cselect_b32 s1, 1, 0 4572; GFX6-NEXT: v_mov_b32_e32 v0, s8 4573; GFX6-NEXT: s_and_b32 s1, s1, 1 4574; GFX6-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 4575; GFX6-NEXT: s_cmp_lg_u32 s1, 0 4576; GFX6-NEXT: v_mov_b32_e32 v0, s2 4577; GFX6-NEXT: v_mov_b32_e32 v3, s9 4578; GFX6-NEXT: s_subb_u32 s1, s3, s7 4579; GFX6-NEXT: v_mov_b32_e32 v1, s3 4580; GFX6-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 4581; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1] 4582; GFX6-NEXT: v_cmp_gt_i64_e64 s[2:3], s[6:7], 0 4583; GFX6-NEXT: s_ashr_i32 s4, s1, 31 4584; GFX6-NEXT: s_xor_b64 vcc, s[2:3], vcc 4585; GFX6-NEXT: v_mov_b32_e32 v0, s0 4586; GFX6-NEXT: s_add_u32 s0, s4, 0 4587; GFX6-NEXT: s_cselect_b32 s2, 1, 0 4588; GFX6-NEXT: s_and_b32 s2, s2, 1 4589; GFX6-NEXT: s_cmp_lg_u32 s2, 0 4590; GFX6-NEXT: s_addc_u32 s3, s4, s5 4591; GFX6-NEXT: v_mov_b32_e32 v1, s0 4592; GFX6-NEXT: v_mov_b32_e32 v3, s3 4593; GFX6-NEXT: v_mov_b32_e32 v5, s1 4594; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 4595; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc 4596; GFX6-NEXT: v_readfirstlane_b32 s0, v4 4597; GFX6-NEXT: v_readfirstlane_b32 s1, v2 4598; GFX6-NEXT: v_readfirstlane_b32 s2, v0 4599; GFX6-NEXT: v_readfirstlane_b32 s3, v1 4600; GFX6-NEXT: ; return to shader part epilog 4601; 4602; GFX8-LABEL: s_ssubsat_v2i64: 4603; GFX8: ; %bb.0: 4604; GFX8-NEXT: s_sub_u32 s8, s0, s4 4605; GFX8-NEXT: s_cselect_b32 s9, 1, 0 4606; GFX8-NEXT: s_and_b32 s9, s9, 1 4607; GFX8-NEXT: s_cmp_lg_u32 s9, 0 4608; GFX8-NEXT: v_mov_b32_e32 v0, s0 4609; GFX8-NEXT: s_subb_u32 s9, s1, s5 4610; GFX8-NEXT: v_mov_b32_e32 v1, s1 4611; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[8:9], v[0:1] 4612; GFX8-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], 0 4613; GFX8-NEXT: s_ashr_i32 s4, s9, 31 4614; GFX8-NEXT: s_xor_b64 vcc, s[0:1], vcc 4615; GFX8-NEXT: s_add_u32 s0, s4, 0 4616; GFX8-NEXT: s_cselect_b32 s1, 1, 0 4617; GFX8-NEXT: s_and_b32 s1, s1, 1 4618; GFX8-NEXT: s_brev_b32 s5, 1 4619; GFX8-NEXT: s_cmp_lg_u32 s1, 0 4620; GFX8-NEXT: s_addc_u32 s1, s4, s5 4621; GFX8-NEXT: v_mov_b32_e32 v1, s0 4622; GFX8-NEXT: s_sub_u32 s0, s2, s6 4623; GFX8-NEXT: v_mov_b32_e32 v2, s1 4624; GFX8-NEXT: s_cselect_b32 s1, 1, 0 4625; GFX8-NEXT: v_mov_b32_e32 v0, s8 4626; GFX8-NEXT: s_and_b32 s1, s1, 1 4627; GFX8-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 4628; GFX8-NEXT: s_cmp_lg_u32 s1, 0 4629; GFX8-NEXT: v_mov_b32_e32 v0, s2 4630; GFX8-NEXT: v_mov_b32_e32 v3, s9 4631; GFX8-NEXT: s_subb_u32 s1, s3, s7 4632; GFX8-NEXT: v_mov_b32_e32 v1, s3 4633; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 4634; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1] 4635; GFX8-NEXT: v_cmp_gt_i64_e64 s[2:3], s[6:7], 0 4636; GFX8-NEXT: s_ashr_i32 s4, s1, 31 4637; GFX8-NEXT: s_xor_b64 vcc, s[2:3], vcc 4638; GFX8-NEXT: v_mov_b32_e32 v0, s0 4639; GFX8-NEXT: s_add_u32 s0, s4, 0 4640; GFX8-NEXT: s_cselect_b32 s2, 1, 0 4641; GFX8-NEXT: s_and_b32 s2, s2, 1 4642; GFX8-NEXT: s_cmp_lg_u32 s2, 0 4643; GFX8-NEXT: s_addc_u32 s3, s4, s5 4644; GFX8-NEXT: v_mov_b32_e32 v1, s0 4645; GFX8-NEXT: v_mov_b32_e32 v3, s3 4646; GFX8-NEXT: v_mov_b32_e32 v5, s1 4647; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 4648; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc 4649; GFX8-NEXT: v_readfirstlane_b32 s0, v4 4650; GFX8-NEXT: v_readfirstlane_b32 s1, v2 4651; GFX8-NEXT: v_readfirstlane_b32 s2, v0 4652; GFX8-NEXT: v_readfirstlane_b32 s3, v1 4653; GFX8-NEXT: ; return to shader part epilog 4654; 4655; GFX9-LABEL: s_ssubsat_v2i64: 4656; GFX9: ; %bb.0: 4657; GFX9-NEXT: s_sub_u32 s8, s0, s4 4658; GFX9-NEXT: s_cselect_b32 s9, 1, 0 4659; GFX9-NEXT: s_and_b32 s9, s9, 1 4660; GFX9-NEXT: s_cmp_lg_u32 s9, 0 4661; GFX9-NEXT: v_mov_b32_e32 v0, s0 4662; GFX9-NEXT: s_subb_u32 s9, s1, s5 4663; GFX9-NEXT: v_mov_b32_e32 v1, s1 4664; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[8:9], v[0:1] 4665; GFX9-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], 0 4666; GFX9-NEXT: s_ashr_i32 s4, s9, 31 4667; GFX9-NEXT: s_xor_b64 vcc, s[0:1], vcc 4668; GFX9-NEXT: s_add_u32 s0, s4, 0 4669; GFX9-NEXT: s_cselect_b32 s1, 1, 0 4670; GFX9-NEXT: s_and_b32 s1, s1, 1 4671; GFX9-NEXT: s_brev_b32 s5, 1 4672; GFX9-NEXT: s_cmp_lg_u32 s1, 0 4673; GFX9-NEXT: s_addc_u32 s1, s4, s5 4674; GFX9-NEXT: v_mov_b32_e32 v1, s0 4675; GFX9-NEXT: s_sub_u32 s0, s2, s6 4676; GFX9-NEXT: v_mov_b32_e32 v2, s1 4677; GFX9-NEXT: s_cselect_b32 s1, 1, 0 4678; GFX9-NEXT: v_mov_b32_e32 v0, s8 4679; GFX9-NEXT: s_and_b32 s1, s1, 1 4680; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 4681; GFX9-NEXT: s_cmp_lg_u32 s1, 0 4682; GFX9-NEXT: v_mov_b32_e32 v0, s2 4683; GFX9-NEXT: v_mov_b32_e32 v3, s9 4684; GFX9-NEXT: s_subb_u32 s1, s3, s7 4685; GFX9-NEXT: v_mov_b32_e32 v1, s3 4686; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 4687; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1] 4688; GFX9-NEXT: v_cmp_gt_i64_e64 s[2:3], s[6:7], 0 4689; GFX9-NEXT: s_ashr_i32 s4, s1, 31 4690; GFX9-NEXT: s_xor_b64 vcc, s[2:3], vcc 4691; GFX9-NEXT: v_mov_b32_e32 v0, s0 4692; GFX9-NEXT: s_add_u32 s0, s4, 0 4693; GFX9-NEXT: s_cselect_b32 s2, 1, 0 4694; GFX9-NEXT: s_and_b32 s2, s2, 1 4695; GFX9-NEXT: s_cmp_lg_u32 s2, 0 4696; GFX9-NEXT: s_addc_u32 s3, s4, s5 4697; GFX9-NEXT: v_mov_b32_e32 v1, s0 4698; GFX9-NEXT: v_mov_b32_e32 v3, s3 4699; GFX9-NEXT: v_mov_b32_e32 v5, s1 4700; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 4701; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc 4702; GFX9-NEXT: v_readfirstlane_b32 s0, v4 4703; GFX9-NEXT: v_readfirstlane_b32 s1, v2 4704; GFX9-NEXT: v_readfirstlane_b32 s2, v0 4705; GFX9-NEXT: v_readfirstlane_b32 s3, v1 4706; GFX9-NEXT: ; return to shader part epilog 4707; 4708; GFX10-LABEL: s_ssubsat_v2i64: 4709; GFX10: ; %bb.0: 4710; GFX10-NEXT: s_sub_u32 s8, s0, s4 4711; GFX10-NEXT: s_cselect_b32 s9, 1, 0 4712; GFX10-NEXT: v_cmp_gt_i64_e64 s4, s[4:5], 0 4713; GFX10-NEXT: s_and_b32 s9, s9, 1 4714; GFX10-NEXT: v_mov_b32_e32 v0, s8 4715; GFX10-NEXT: s_cmp_lg_u32 s9, 0 4716; GFX10-NEXT: s_brev_b32 s10, 1 4717; GFX10-NEXT: s_subb_u32 s9, s1, s5 4718; GFX10-NEXT: v_cmp_lt_i64_e64 s0, s[8:9], s[0:1] 4719; GFX10-NEXT: s_ashr_i32 s1, s9, 31 4720; GFX10-NEXT: v_mov_b32_e32 v1, s9 4721; GFX10-NEXT: s_xor_b32 s8, s4, s0 4722; GFX10-NEXT: s_add_u32 s0, s1, 0 4723; GFX10-NEXT: s_cselect_b32 s4, 1, 0 4724; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s0, s8 4725; GFX10-NEXT: s_and_b32 s4, s4, 1 4726; GFX10-NEXT: s_cmp_lg_u32 s4, 0 4727; GFX10-NEXT: s_addc_u32 s1, s1, s10 4728; GFX10-NEXT: s_sub_u32 s4, s2, s6 4729; GFX10-NEXT: s_cselect_b32 s5, 1, 0 4730; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s1, s8 4731; GFX10-NEXT: s_and_b32 s5, s5, 1 4732; GFX10-NEXT: v_mov_b32_e32 v2, s4 4733; GFX10-NEXT: s_cmp_lg_u32 s5, 0 4734; GFX10-NEXT: s_subb_u32 s5, s3, s7 4735; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[4:5], s[2:3] 4736; GFX10-NEXT: v_cmp_gt_i64_e64 s3, s[6:7], 0 4737; GFX10-NEXT: s_ashr_i32 s1, s5, 31 4738; GFX10-NEXT: v_mov_b32_e32 v3, s5 4739; GFX10-NEXT: s_xor_b32 s2, s3, s2 4740; GFX10-NEXT: s_add_u32 s0, s1, 0 4741; GFX10-NEXT: s_cselect_b32 s3, 1, 0 4742; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s0, s2 4743; GFX10-NEXT: s_and_b32 s3, s3, 1 4744; GFX10-NEXT: v_readfirstlane_b32 s0, v0 4745; GFX10-NEXT: s_cmp_lg_u32 s3, 0 4746; GFX10-NEXT: s_addc_u32 s1, s1, s10 4747; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s1, s2 4748; GFX10-NEXT: v_readfirstlane_b32 s1, v1 4749; GFX10-NEXT: v_readfirstlane_b32 s2, v2 4750; GFX10-NEXT: v_readfirstlane_b32 s3, v3 4751; GFX10-NEXT: ; return to shader part epilog 4752 %result = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) 4753 ret <2 x i64> %result 4754} 4755 4756define amdgpu_ps i128 @s_ssubsat_i128(i128 inreg %lhs, i128 inreg %rhs) { 4757; GFX6-LABEL: s_ssubsat_i128: 4758; GFX6: ; %bb.0: 4759; GFX6-NEXT: s_sub_u32 s8, s0, s4 4760; GFX6-NEXT: s_cselect_b32 s9, 1, 0 4761; GFX6-NEXT: s_and_b32 s9, s9, 1 4762; GFX6-NEXT: s_cmp_lg_u32 s9, 0 4763; GFX6-NEXT: s_subb_u32 s9, s1, s5 4764; GFX6-NEXT: s_cselect_b32 s10, 1, 0 4765; GFX6-NEXT: s_and_b32 s10, s10, 1 4766; GFX6-NEXT: s_cmp_lg_u32 s10, 0 4767; GFX6-NEXT: s_subb_u32 s10, s2, s6 4768; GFX6-NEXT: s_cselect_b32 s11, 1, 0 4769; GFX6-NEXT: v_mov_b32_e32 v3, s1 4770; GFX6-NEXT: s_and_b32 s11, s11, 1 4771; GFX6-NEXT: v_mov_b32_e32 v2, s0 4772; GFX6-NEXT: s_cmp_lg_u32 s11, 0 4773; GFX6-NEXT: v_mov_b32_e32 v0, s2 4774; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[2:3] 4775; GFX6-NEXT: s_subb_u32 s11, s3, s7 4776; GFX6-NEXT: v_mov_b32_e32 v1, s3 4777; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 4778; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, s[10:11], v[0:1] 4779; GFX6-NEXT: v_cmp_gt_u64_e64 s[0:1], s[4:5], 0 4780; GFX6-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 4781; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] 4782; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 4783; GFX6-NEXT: v_cmp_gt_i64_e64 s[0:1], s[6:7], 0 4784; GFX6-NEXT: s_ashr_i32 s3, s11, 31 4785; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 4786; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 4787; GFX6-NEXT: s_add_u32 s0, s3, 0 4788; GFX6-NEXT: s_cselect_b32 s1, 1, 0 4789; GFX6-NEXT: s_and_b32 s1, s1, 1 4790; GFX6-NEXT: s_cmp_lg_u32 s1, 0 4791; GFX6-NEXT: s_addc_u32 s1, s3, 0 4792; GFX6-NEXT: s_cselect_b32 s2, 1, 0 4793; GFX6-NEXT: s_and_b32 s2, s2, 1 4794; GFX6-NEXT: v_cmp_eq_u64_e64 vcc, s[6:7], 0 4795; GFX6-NEXT: s_cmp_lg_u32 s2, 0 4796; GFX6-NEXT: s_addc_u32 s2, s3, 0 4797; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 4798; GFX6-NEXT: s_cselect_b32 s4, 1, 0 4799; GFX6-NEXT: v_xor_b32_e32 v0, v1, v0 4800; GFX6-NEXT: s_and_b32 s4, s4, 1 4801; GFX6-NEXT: s_cmp_lg_u32 s4, 0 4802; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 4803; GFX6-NEXT: s_addc_u32 s3, s3, 0x80000000 4804; GFX6-NEXT: v_mov_b32_e32 v1, s0 4805; GFX6-NEXT: v_mov_b32_e32 v2, s1 4806; GFX6-NEXT: v_mov_b32_e32 v3, s8 4807; GFX6-NEXT: v_mov_b32_e32 v4, s9 4808; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 4809; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 4810; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc 4811; GFX6-NEXT: v_mov_b32_e32 v2, s2 4812; GFX6-NEXT: v_mov_b32_e32 v3, s3 4813; GFX6-NEXT: v_mov_b32_e32 v4, s10 4814; GFX6-NEXT: v_mov_b32_e32 v5, s11 4815; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 4816; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 4817; GFX6-NEXT: v_readfirstlane_b32 s0, v0 4818; GFX6-NEXT: v_readfirstlane_b32 s1, v1 4819; GFX6-NEXT: v_readfirstlane_b32 s2, v2 4820; GFX6-NEXT: v_readfirstlane_b32 s3, v3 4821; GFX6-NEXT: ; return to shader part epilog 4822; 4823; GFX8-LABEL: s_ssubsat_i128: 4824; GFX8: ; %bb.0: 4825; GFX8-NEXT: s_sub_u32 s8, s0, s4 4826; GFX8-NEXT: s_cselect_b32 s9, 1, 0 4827; GFX8-NEXT: s_and_b32 s9, s9, 1 4828; GFX8-NEXT: s_cmp_lg_u32 s9, 0 4829; GFX8-NEXT: s_subb_u32 s9, s1, s5 4830; GFX8-NEXT: s_cselect_b32 s10, 1, 0 4831; GFX8-NEXT: s_and_b32 s10, s10, 1 4832; GFX8-NEXT: s_cmp_lg_u32 s10, 0 4833; GFX8-NEXT: s_subb_u32 s10, s2, s6 4834; GFX8-NEXT: s_cselect_b32 s11, 1, 0 4835; GFX8-NEXT: s_and_b32 s11, s11, 1 4836; GFX8-NEXT: v_mov_b32_e32 v3, s1 4837; GFX8-NEXT: s_cmp_lg_u32 s11, 0 4838; GFX8-NEXT: v_mov_b32_e32 v2, s0 4839; GFX8-NEXT: s_subb_u32 s11, s3, s7 4840; GFX8-NEXT: v_mov_b32_e32 v0, s2 4841; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[2:3] 4842; GFX8-NEXT: v_mov_b32_e32 v1, s3 4843; GFX8-NEXT: s_cmp_eq_u64 s[10:11], s[2:3] 4844; GFX8-NEXT: s_cselect_b32 s2, 1, 0 4845; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 4846; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[10:11], v[0:1] 4847; GFX8-NEXT: s_and_b32 s0, 1, s2 4848; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 4849; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 4850; GFX8-NEXT: v_cmp_gt_u64_e64 s[0:1], s[4:5], 0 4851; GFX8-NEXT: s_cmp_eq_u64 s[6:7], 0 4852; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 4853; GFX8-NEXT: v_cmp_gt_i64_e64 s[0:1], s[6:7], 0 4854; GFX8-NEXT: s_cselect_b32 s2, 1, 0 4855; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 4856; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 4857; GFX8-NEXT: s_and_b32 s0, 1, s2 4858; GFX8-NEXT: s_ashr_i32 s3, s11, 31 4859; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 4860; GFX8-NEXT: s_add_u32 s0, s3, 0 4861; GFX8-NEXT: s_cselect_b32 s1, 1, 0 4862; GFX8-NEXT: s_and_b32 s1, s1, 1 4863; GFX8-NEXT: s_cmp_lg_u32 s1, 0 4864; GFX8-NEXT: s_addc_u32 s1, s3, 0 4865; GFX8-NEXT: s_cselect_b32 s2, 1, 0 4866; GFX8-NEXT: s_and_b32 s2, s2, 1 4867; GFX8-NEXT: s_cmp_lg_u32 s2, 0 4868; GFX8-NEXT: s_addc_u32 s2, s3, 0 4869; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 4870; GFX8-NEXT: s_cselect_b32 s4, 1, 0 4871; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 4872; GFX8-NEXT: s_and_b32 s4, s4, 1 4873; GFX8-NEXT: s_cmp_lg_u32 s4, 0 4874; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 4875; GFX8-NEXT: s_addc_u32 s3, s3, 0x80000000 4876; GFX8-NEXT: v_mov_b32_e32 v1, s0 4877; GFX8-NEXT: v_mov_b32_e32 v2, s1 4878; GFX8-NEXT: v_mov_b32_e32 v3, s8 4879; GFX8-NEXT: v_mov_b32_e32 v4, s9 4880; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 4881; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 4882; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc 4883; GFX8-NEXT: v_mov_b32_e32 v2, s2 4884; GFX8-NEXT: v_mov_b32_e32 v3, s3 4885; GFX8-NEXT: v_mov_b32_e32 v4, s10 4886; GFX8-NEXT: v_mov_b32_e32 v5, s11 4887; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 4888; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 4889; GFX8-NEXT: v_readfirstlane_b32 s0, v0 4890; GFX8-NEXT: v_readfirstlane_b32 s1, v1 4891; GFX8-NEXT: v_readfirstlane_b32 s2, v2 4892; GFX8-NEXT: v_readfirstlane_b32 s3, v3 4893; GFX8-NEXT: ; return to shader part epilog 4894; 4895; GFX9-LABEL: s_ssubsat_i128: 4896; GFX9: ; %bb.0: 4897; GFX9-NEXT: s_sub_u32 s8, s0, s4 4898; GFX9-NEXT: s_cselect_b32 s9, 1, 0 4899; GFX9-NEXT: s_and_b32 s9, s9, 1 4900; GFX9-NEXT: s_cmp_lg_u32 s9, 0 4901; GFX9-NEXT: s_subb_u32 s9, s1, s5 4902; GFX9-NEXT: s_cselect_b32 s10, 1, 0 4903; GFX9-NEXT: s_and_b32 s10, s10, 1 4904; GFX9-NEXT: s_cmp_lg_u32 s10, 0 4905; GFX9-NEXT: s_subb_u32 s10, s2, s6 4906; GFX9-NEXT: s_cselect_b32 s11, 1, 0 4907; GFX9-NEXT: s_and_b32 s11, s11, 1 4908; GFX9-NEXT: v_mov_b32_e32 v3, s1 4909; GFX9-NEXT: s_cmp_lg_u32 s11, 0 4910; GFX9-NEXT: v_mov_b32_e32 v2, s0 4911; GFX9-NEXT: s_subb_u32 s11, s3, s7 4912; GFX9-NEXT: v_mov_b32_e32 v0, s2 4913; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[2:3] 4914; GFX9-NEXT: v_mov_b32_e32 v1, s3 4915; GFX9-NEXT: s_cmp_eq_u64 s[10:11], s[2:3] 4916; GFX9-NEXT: s_cselect_b32 s2, 1, 0 4917; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 4918; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[10:11], v[0:1] 4919; GFX9-NEXT: s_and_b32 s0, 1, s2 4920; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 4921; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 4922; GFX9-NEXT: v_cmp_gt_u64_e64 s[0:1], s[4:5], 0 4923; GFX9-NEXT: s_cmp_eq_u64 s[6:7], 0 4924; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 4925; GFX9-NEXT: v_cmp_gt_i64_e64 s[0:1], s[6:7], 0 4926; GFX9-NEXT: s_cselect_b32 s2, 1, 0 4927; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 4928; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 4929; GFX9-NEXT: s_and_b32 s0, 1, s2 4930; GFX9-NEXT: s_ashr_i32 s3, s11, 31 4931; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 4932; GFX9-NEXT: s_add_u32 s0, s3, 0 4933; GFX9-NEXT: s_cselect_b32 s1, 1, 0 4934; GFX9-NEXT: s_and_b32 s1, s1, 1 4935; GFX9-NEXT: s_cmp_lg_u32 s1, 0 4936; GFX9-NEXT: s_addc_u32 s1, s3, 0 4937; GFX9-NEXT: s_cselect_b32 s2, 1, 0 4938; GFX9-NEXT: s_and_b32 s2, s2, 1 4939; GFX9-NEXT: s_cmp_lg_u32 s2, 0 4940; GFX9-NEXT: s_addc_u32 s2, s3, 0 4941; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 4942; GFX9-NEXT: s_cselect_b32 s4, 1, 0 4943; GFX9-NEXT: v_xor_b32_e32 v0, v1, v0 4944; GFX9-NEXT: s_and_b32 s4, s4, 1 4945; GFX9-NEXT: s_cmp_lg_u32 s4, 0 4946; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 4947; GFX9-NEXT: s_addc_u32 s3, s3, 0x80000000 4948; GFX9-NEXT: v_mov_b32_e32 v1, s0 4949; GFX9-NEXT: v_mov_b32_e32 v2, s1 4950; GFX9-NEXT: v_mov_b32_e32 v3, s8 4951; GFX9-NEXT: v_mov_b32_e32 v4, s9 4952; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 4953; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 4954; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc 4955; GFX9-NEXT: v_mov_b32_e32 v2, s2 4956; GFX9-NEXT: v_mov_b32_e32 v3, s3 4957; GFX9-NEXT: v_mov_b32_e32 v4, s10 4958; GFX9-NEXT: v_mov_b32_e32 v5, s11 4959; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 4960; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 4961; GFX9-NEXT: v_readfirstlane_b32 s0, v0 4962; GFX9-NEXT: v_readfirstlane_b32 s1, v1 4963; GFX9-NEXT: v_readfirstlane_b32 s2, v2 4964; GFX9-NEXT: v_readfirstlane_b32 s3, v3 4965; GFX9-NEXT: ; return to shader part epilog 4966; 4967; GFX10-LABEL: s_ssubsat_i128: 4968; GFX10: ; %bb.0: 4969; GFX10-NEXT: s_sub_u32 s8, s0, s4 4970; GFX10-NEXT: s_cselect_b32 s9, 1, 0 4971; GFX10-NEXT: s_and_b32 s9, s9, 1 4972; GFX10-NEXT: s_cmp_lg_u32 s9, 0 4973; GFX10-NEXT: s_subb_u32 s9, s1, s5 4974; GFX10-NEXT: s_cselect_b32 s10, 1, 0 4975; GFX10-NEXT: v_cmp_lt_u64_e64 s0, s[8:9], s[0:1] 4976; GFX10-NEXT: s_and_b32 s10, s10, 1 4977; GFX10-NEXT: s_cmp_lg_u32 s10, 0 4978; GFX10-NEXT: s_subb_u32 s10, s2, s6 4979; GFX10-NEXT: s_cselect_b32 s11, 1, 0 4980; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 4981; GFX10-NEXT: s_and_b32 s11, s11, 1 4982; GFX10-NEXT: v_mov_b32_e32 v3, s10 4983; GFX10-NEXT: s_cmp_lg_u32 s11, 0 4984; GFX10-NEXT: s_subb_u32 s11, s3, s7 4985; GFX10-NEXT: s_cmp_eq_u64 s[10:11], s[2:3] 4986; GFX10-NEXT: v_cmp_lt_i64_e64 s1, s[10:11], s[2:3] 4987; GFX10-NEXT: s_cselect_b32 s0, 1, 0 4988; GFX10-NEXT: v_mov_b32_e32 v4, s11 4989; GFX10-NEXT: s_and_b32 s0, 1, s0 4990; GFX10-NEXT: s_cmp_eq_u64 s[6:7], 0 4991; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 4992; GFX10-NEXT: v_cmp_gt_u64_e64 s0, s[4:5], 0 4993; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s1 4994; GFX10-NEXT: s_cselect_b32 s1, 1, 0 4995; GFX10-NEXT: s_ashr_i32 s3, s11, 31 4996; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 4997; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 4998; GFX10-NEXT: v_cmp_gt_i64_e64 s0, s[6:7], 0 4999; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 5000; GFX10-NEXT: s_and_b32 s0, 1, s1 5001; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 5002; GFX10-NEXT: s_add_u32 s0, s3, 0 5003; GFX10-NEXT: s_cselect_b32 s1, 1, 0 5004; GFX10-NEXT: s_and_b32 s1, s1, 1 5005; GFX10-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo 5006; GFX10-NEXT: s_cmp_lg_u32 s1, 0 5007; GFX10-NEXT: v_mov_b32_e32 v2, s9 5008; GFX10-NEXT: s_addc_u32 s1, s3, 0 5009; GFX10-NEXT: s_cselect_b32 s2, 1, 0 5010; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 5011; GFX10-NEXT: s_and_b32 s2, s2, 1 5012; GFX10-NEXT: v_mov_b32_e32 v1, s8 5013; GFX10-NEXT: s_cmp_lg_u32 s2, 0 5014; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 5015; GFX10-NEXT: s_addc_u32 s2, s3, 0 5016; GFX10-NEXT: s_cselect_b32 s4, 1, 0 5017; GFX10-NEXT: s_and_b32 s4, s4, 1 5018; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 5019; GFX10-NEXT: s_cmp_lg_u32 s4, 0 5020; GFX10-NEXT: s_addc_u32 s3, s3, 0x80000000 5021; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo 5022; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s1, vcc_lo 5023; GFX10-NEXT: v_cndmask_b32_e64 v2, v3, s2, vcc_lo 5024; GFX10-NEXT: v_cndmask_b32_e64 v3, v4, s3, vcc_lo 5025; GFX10-NEXT: v_readfirstlane_b32 s0, v0 5026; GFX10-NEXT: v_readfirstlane_b32 s1, v1 5027; GFX10-NEXT: v_readfirstlane_b32 s2, v2 5028; GFX10-NEXT: v_readfirstlane_b32 s3, v3 5029; GFX10-NEXT: ; return to shader part epilog 5030 %result = call i128 @llvm.ssub.sat.i128(i128 %lhs, i128 %rhs) 5031 ret i128 %result 5032} 5033 5034define amdgpu_ps <4 x float> @ssubsat_i128_sv(i128 inreg %lhs, i128 %rhs) { 5035; GFX6-LABEL: ssubsat_i128_sv: 5036; GFX6: ; %bb.0: 5037; GFX6-NEXT: v_mov_b32_e32 v5, s1 5038; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s0, v0 5039; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v5, v1, vcc 5040; GFX6-NEXT: v_mov_b32_e32 v6, s2 5041; GFX6-NEXT: v_mov_b32_e32 v7, s3 5042; GFX6-NEXT: v_subb_u32_e32 v6, vcc, v6, v2, vcc 5043; GFX6-NEXT: v_subb_u32_e32 v7, vcc, v7, v3, vcc 5044; GFX6-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[4:5] 5045; GFX6-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 5046; GFX6-NEXT: v_cmp_gt_i64_e32 vcc, s[2:3], v[6:7] 5047; GFX6-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 5048; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, s[2:3], v[6:7] 5049; GFX6-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc 5050; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1] 5051; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5052; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[2:3] 5053; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 5054; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] 5055; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 5056; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v7 5057; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0, v1 5058; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc 5059; GFX6-NEXT: v_xor_b32_e32 v0, v0, v8 5060; GFX6-NEXT: v_bfrev_b32_e32 v8, 1 5061; GFX6-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 5062; GFX6-NEXT: v_addc_u32_e32 v8, vcc, v1, v8, vcc 5063; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 5064; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5065; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc 5066; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc 5067; GFX6-NEXT: v_cndmask_b32_e32 v2, v6, v9, vcc 5068; GFX6-NEXT: v_cndmask_b32_e32 v3, v7, v8, vcc 5069; GFX6-NEXT: ; return to shader part epilog 5070; 5071; GFX8-LABEL: ssubsat_i128_sv: 5072; GFX8: ; %bb.0: 5073; GFX8-NEXT: v_mov_b32_e32 v5, s1 5074; GFX8-NEXT: v_sub_u32_e32 v4, vcc, s0, v0 5075; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v5, v1, vcc 5076; GFX8-NEXT: v_mov_b32_e32 v6, s2 5077; GFX8-NEXT: v_mov_b32_e32 v7, s3 5078; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v6, v2, vcc 5079; GFX8-NEXT: v_subb_u32_e32 v7, vcc, v7, v3, vcc 5080; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[4:5] 5081; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 5082; GFX8-NEXT: v_cmp_gt_i64_e32 vcc, s[2:3], v[6:7] 5083; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 5084; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, s[2:3], v[6:7] 5085; GFX8-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc 5086; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1] 5087; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5088; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[2:3] 5089; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 5090; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] 5091; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 5092; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v7 5093; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0, v1 5094; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc 5095; GFX8-NEXT: v_xor_b32_e32 v0, v0, v8 5096; GFX8-NEXT: v_bfrev_b32_e32 v8, 1 5097; GFX8-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 5098; GFX8-NEXT: v_addc_u32_e32 v8, vcc, v1, v8, vcc 5099; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 5100; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5101; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc 5102; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc 5103; GFX8-NEXT: v_cndmask_b32_e32 v2, v6, v9, vcc 5104; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v8, vcc 5105; GFX8-NEXT: ; return to shader part epilog 5106; 5107; GFX9-LABEL: ssubsat_i128_sv: 5108; GFX9: ; %bb.0: 5109; GFX9-NEXT: v_mov_b32_e32 v5, s1 5110; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, s0, v0 5111; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v1, vcc 5112; GFX9-NEXT: v_mov_b32_e32 v6, s2 5113; GFX9-NEXT: v_mov_b32_e32 v7, s3 5114; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v2, vcc 5115; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v3, vcc 5116; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[4:5] 5117; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 5118; GFX9-NEXT: v_cmp_gt_i64_e32 vcc, s[2:3], v[6:7] 5119; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc 5120; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, s[2:3], v[6:7] 5121; GFX9-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc 5122; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1] 5123; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5124; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[2:3] 5125; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 5126; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] 5127; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 5128; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v7 5129; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0, v1 5130; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc 5131; GFX9-NEXT: v_xor_b32_e32 v0, v0, v8 5132; GFX9-NEXT: v_bfrev_b32_e32 v8, 1 5133; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v1, vcc 5134; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v1, v8, vcc 5135; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 5136; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5137; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc 5138; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc 5139; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v9, vcc 5140; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v8, vcc 5141; GFX9-NEXT: ; return to shader part epilog 5142; 5143; GFX10-LABEL: ssubsat_i128_sv: 5144; GFX10: ; %bb.0: 5145; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, s0, v0 5146; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, s1, v1, vcc_lo 5147; GFX10-NEXT: v_sub_co_ci_u32_e32 v6, vcc_lo, s2, v2, vcc_lo 5148; GFX10-NEXT: v_sub_co_ci_u32_e32 v7, vcc_lo, s3, v3, vcc_lo 5149; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[4:5] 5150; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo 5151; GFX10-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[2:3], v[6:7] 5152; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo 5153; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, 0, v[0:1] 5154; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 5155; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, 0, v[2:3] 5156; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 5157; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[6:7] 5158; GFX10-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc_lo 5159; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[2:3] 5160; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 5161; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v7 5162; GFX10-NEXT: v_xor_b32_e32 v0, v0, v8 5163; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v1, 0 5164; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo 5165; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 5166; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v1, vcc_lo 5167; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0x80000000, v1, vcc_lo 5168; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 5169; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, v2, s0 5170; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, v3, s0 5171; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, v8, s0 5172; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v9, s0 5173; GFX10-NEXT: ; return to shader part epilog 5174 %result = call i128 @llvm.ssub.sat.i128(i128 %lhs, i128 %rhs) 5175 %cast = bitcast i128 %result to <4 x float> 5176 ret <4 x float> %cast 5177} 5178 5179define amdgpu_ps <4 x float> @ssubsat_i128_vs(i128 %lhs, i128 inreg %rhs) { 5180; GFX6-LABEL: ssubsat_i128_vs: 5181; GFX6: ; %bb.0: 5182; GFX6-NEXT: v_mov_b32_e32 v5, s1 5183; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, s0, v0 5184; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v1, v5, vcc 5185; GFX6-NEXT: v_mov_b32_e32 v6, s2 5186; GFX6-NEXT: v_mov_b32_e32 v7, s3 5187; GFX6-NEXT: v_subb_u32_e32 v6, vcc, v2, v6, vcc 5188; GFX6-NEXT: v_subb_u32_e32 v7, vcc, v3, v7, vcc 5189; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, v[4:5], v[0:1] 5190; GFX6-NEXT: v_cmp_gt_u64_e64 s[0:1], s[0:1], 0 5191; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5192; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[6:7], v[2:3] 5193; GFX6-NEXT: v_bfrev_b32_e32 v8, 1 5194; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 5195; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3] 5196; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 5197; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 5198; GFX6-NEXT: v_cmp_gt_i64_e64 s[0:1], s[2:3], 0 5199; GFX6-NEXT: v_cmp_eq_u64_e64 vcc, s[2:3], 0 5200; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 5201; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 5202; GFX6-NEXT: v_xor_b32_e32 v0, v1, v0 5203; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v7 5204; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0, v1 5205; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc 5206; GFX6-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 5207; GFX6-NEXT: v_addc_u32_e32 v8, vcc, v1, v8, vcc 5208; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 5209; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5210; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc 5211; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc 5212; GFX6-NEXT: v_cndmask_b32_e32 v2, v6, v9, vcc 5213; GFX6-NEXT: v_cndmask_b32_e32 v3, v7, v8, vcc 5214; GFX6-NEXT: ; return to shader part epilog 5215; 5216; GFX8-LABEL: ssubsat_i128_vs: 5217; GFX8: ; %bb.0: 5218; GFX8-NEXT: v_mov_b32_e32 v5, s1 5219; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, s0, v0 5220; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v5, vcc 5221; GFX8-NEXT: v_mov_b32_e32 v6, s2 5222; GFX8-NEXT: v_mov_b32_e32 v7, s3 5223; GFX8-NEXT: v_subb_u32_e32 v6, vcc, v2, v6, vcc 5224; GFX8-NEXT: v_subb_u32_e32 v7, vcc, v3, v7, vcc 5225; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, v[4:5], v[0:1] 5226; GFX8-NEXT: v_cmp_gt_u64_e64 s[0:1], s[0:1], 0 5227; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5228; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[6:7], v[2:3] 5229; GFX8-NEXT: s_cmp_eq_u64 s[2:3], 0 5230; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 5231; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3] 5232; GFX8-NEXT: s_cselect_b32 s4, 1, 0 5233; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 5234; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 5235; GFX8-NEXT: v_cmp_gt_i64_e64 s[0:1], s[2:3], 0 5236; GFX8-NEXT: v_bfrev_b32_e32 v8, 1 5237; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 5238; GFX8-NEXT: s_and_b32 s0, 1, s4 5239; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 5240; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 5241; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 5242; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v7 5243; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0, v1 5244; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc 5245; GFX8-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc 5246; GFX8-NEXT: v_addc_u32_e32 v8, vcc, v1, v8, vcc 5247; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 5248; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5249; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc 5250; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc 5251; GFX8-NEXT: v_cndmask_b32_e32 v2, v6, v9, vcc 5252; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v8, vcc 5253; GFX8-NEXT: ; return to shader part epilog 5254; 5255; GFX9-LABEL: ssubsat_i128_vs: 5256; GFX9: ; %bb.0: 5257; GFX9-NEXT: v_mov_b32_e32 v5, s1 5258; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s0, v0 5259; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v5, vcc 5260; GFX9-NEXT: v_mov_b32_e32 v6, s2 5261; GFX9-NEXT: v_mov_b32_e32 v7, s3 5262; GFX9-NEXT: v_subb_co_u32_e32 v6, vcc, v2, v6, vcc 5263; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v3, v7, vcc 5264; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[4:5], v[0:1] 5265; GFX9-NEXT: v_cmp_gt_u64_e64 s[0:1], s[0:1], 0 5266; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5267; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[6:7], v[2:3] 5268; GFX9-NEXT: s_cmp_eq_u64 s[2:3], 0 5269; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 5270; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3] 5271; GFX9-NEXT: s_cselect_b32 s4, 1, 0 5272; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 5273; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 5274; GFX9-NEXT: v_cmp_gt_i64_e64 s[0:1], s[2:3], 0 5275; GFX9-NEXT: v_bfrev_b32_e32 v8, 1 5276; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 5277; GFX9-NEXT: s_and_b32 s0, 1, s4 5278; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 5279; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 5280; GFX9-NEXT: v_xor_b32_e32 v0, v1, v0 5281; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v7 5282; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0, v1 5283; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc 5284; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v1, vcc 5285; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v1, v8, vcc 5286; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 5287; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5288; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc 5289; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc 5290; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v9, vcc 5291; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v8, vcc 5292; GFX9-NEXT: ; return to shader part epilog 5293; 5294; GFX10-LABEL: ssubsat_i128_vs: 5295; GFX10: ; %bb.0: 5296; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, s0 5297; GFX10-NEXT: v_subrev_co_ci_u32_e32 v5, vcc_lo, s1, v1, vcc_lo 5298; GFX10-NEXT: v_subrev_co_ci_u32_e32 v6, vcc_lo, s2, v2, vcc_lo 5299; GFX10-NEXT: v_subrev_co_ci_u32_e32 v7, vcc_lo, s3, v3, vcc_lo 5300; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[4:5], v[0:1] 5301; GFX10-NEXT: v_cmp_gt_u64_e64 s0, s[0:1], 0 5302; GFX10-NEXT: s_cmp_eq_u64 s[2:3], 0 5303; GFX10-NEXT: s_cselect_b32 s4, 1, 0 5304; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 5305; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[6:7], v[2:3] 5306; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0 5307; GFX10-NEXT: v_cmp_gt_i64_e64 s0, s[2:3], 0 5308; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 5309; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3] 5310; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s0 5311; GFX10-NEXT: s_and_b32 s0, 1, s4 5312; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 5313; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 5314; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc_lo 5315; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 5316; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v7 5317; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 5318; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v1, 0 5319; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo 5320; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v1, vcc_lo 5321; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 5322; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0x80000000, v1, vcc_lo 5323; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, v2, s0 5324; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, v3, s0 5325; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, v8, s0 5326; GFX10-NEXT: v_cndmask_b32_e64 v3, v7, v9, s0 5327; GFX10-NEXT: ; return to shader part epilog 5328 %result = call i128 @llvm.ssub.sat.i128(i128 %lhs, i128 %rhs) 5329 %cast = bitcast i128 %result to <4 x float> 5330 ret <4 x float> %cast 5331} 5332 5333define <2 x i128> @v_ssubsat_v2i128(<2 x i128> %lhs, <2 x i128> %rhs) { 5334; GFX6-LABEL: v_ssubsat_v2i128: 5335; GFX6: ; %bb.0: 5336; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5337; GFX6-NEXT: v_sub_i32_e32 v16, vcc, v0, v8 5338; GFX6-NEXT: v_subb_u32_e32 v17, vcc, v1, v9, vcc 5339; GFX6-NEXT: v_subb_u32_e32 v18, vcc, v2, v10, vcc 5340; GFX6-NEXT: v_subb_u32_e32 v19, vcc, v3, v11, vcc 5341; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, v[16:17], v[0:1] 5342; GFX6-NEXT: v_bfrev_b32_e32 v20, 1 5343; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5344; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[18:19], v[2:3] 5345; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 5346; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, v[18:19], v[2:3] 5347; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 5348; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[8:9] 5349; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 5350; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[10:11] 5351; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 5352; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 5353; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 5354; GFX6-NEXT: v_xor_b32_e32 v0, v1, v0 5355; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v19 5356; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0, v1 5357; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc 5358; GFX6-NEXT: v_addc_u32_e32 v8, vcc, 0, v1, vcc 5359; GFX6-NEXT: v_addc_u32_e32 v9, vcc, v1, v20, vcc 5360; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 5361; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5362; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v2, vcc 5363; GFX6-NEXT: v_cndmask_b32_e32 v1, v17, v3, vcc 5364; GFX6-NEXT: v_cndmask_b32_e32 v2, v18, v8, vcc 5365; GFX6-NEXT: v_cndmask_b32_e32 v3, v19, v9, vcc 5366; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v4, v12 5367; GFX6-NEXT: v_subb_u32_e32 v9, vcc, v5, v13, vcc 5368; GFX6-NEXT: v_subb_u32_e32 v10, vcc, v6, v14, vcc 5369; GFX6-NEXT: v_subb_u32_e32 v11, vcc, v7, v15, vcc 5370; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[4:5] 5371; GFX6-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 5372; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] 5373; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 5374; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] 5375; GFX6-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc 5376; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] 5377; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 5378; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[14:15] 5379; GFX6-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 5380; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] 5381; GFX6-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc 5382; GFX6-NEXT: v_xor_b32_e32 v4, v5, v4 5383; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v11 5384; GFX6-NEXT: v_add_i32_e32 v6, vcc, 0, v5 5385; GFX6-NEXT: v_addc_u32_e32 v7, vcc, 0, v5, vcc 5386; GFX6-NEXT: v_addc_u32_e32 v12, vcc, 0, v5, vcc 5387; GFX6-NEXT: v_addc_u32_e32 v13, vcc, v5, v20, vcc 5388; GFX6-NEXT: v_and_b32_e32 v4, 1, v4 5389; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 5390; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v6, vcc 5391; GFX6-NEXT: v_cndmask_b32_e32 v5, v9, v7, vcc 5392; GFX6-NEXT: v_cndmask_b32_e32 v6, v10, v12, vcc 5393; GFX6-NEXT: v_cndmask_b32_e32 v7, v11, v13, vcc 5394; GFX6-NEXT: s_setpc_b64 s[30:31] 5395; 5396; GFX8-LABEL: v_ssubsat_v2i128: 5397; GFX8: ; %bb.0: 5398; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5399; GFX8-NEXT: v_sub_u32_e32 v16, vcc, v0, v8 5400; GFX8-NEXT: v_subb_u32_e32 v17, vcc, v1, v9, vcc 5401; GFX8-NEXT: v_subb_u32_e32 v18, vcc, v2, v10, vcc 5402; GFX8-NEXT: v_subb_u32_e32 v19, vcc, v3, v11, vcc 5403; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, v[16:17], v[0:1] 5404; GFX8-NEXT: v_bfrev_b32_e32 v20, 1 5405; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5406; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[18:19], v[2:3] 5407; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 5408; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[18:19], v[2:3] 5409; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 5410; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[8:9] 5411; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 5412; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[10:11] 5413; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 5414; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 5415; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 5416; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 5417; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v19 5418; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0, v1 5419; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc 5420; GFX8-NEXT: v_addc_u32_e32 v8, vcc, 0, v1, vcc 5421; GFX8-NEXT: v_addc_u32_e32 v9, vcc, v1, v20, vcc 5422; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 5423; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5424; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v2, vcc 5425; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v3, vcc 5426; GFX8-NEXT: v_cndmask_b32_e32 v2, v18, v8, vcc 5427; GFX8-NEXT: v_cndmask_b32_e32 v3, v19, v9, vcc 5428; GFX8-NEXT: v_sub_u32_e32 v8, vcc, v4, v12 5429; GFX8-NEXT: v_subb_u32_e32 v9, vcc, v5, v13, vcc 5430; GFX8-NEXT: v_subb_u32_e32 v10, vcc, v6, v14, vcc 5431; GFX8-NEXT: v_subb_u32_e32 v11, vcc, v7, v15, vcc 5432; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[4:5] 5433; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 5434; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] 5435; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 5436; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] 5437; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc 5438; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] 5439; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 5440; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[14:15] 5441; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 5442; GFX8-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] 5443; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc 5444; GFX8-NEXT: v_xor_b32_e32 v4, v5, v4 5445; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v11 5446; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0, v5 5447; GFX8-NEXT: v_addc_u32_e32 v7, vcc, 0, v5, vcc 5448; GFX8-NEXT: v_addc_u32_e32 v12, vcc, 0, v5, vcc 5449; GFX8-NEXT: v_addc_u32_e32 v13, vcc, v5, v20, vcc 5450; GFX8-NEXT: v_and_b32_e32 v4, 1, v4 5451; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 5452; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v6, vcc 5453; GFX8-NEXT: v_cndmask_b32_e32 v5, v9, v7, vcc 5454; GFX8-NEXT: v_cndmask_b32_e32 v6, v10, v12, vcc 5455; GFX8-NEXT: v_cndmask_b32_e32 v7, v11, v13, vcc 5456; GFX8-NEXT: s_setpc_b64 s[30:31] 5457; 5458; GFX9-LABEL: v_ssubsat_v2i128: 5459; GFX9: ; %bb.0: 5460; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5461; GFX9-NEXT: v_sub_co_u32_e32 v16, vcc, v0, v8 5462; GFX9-NEXT: v_subb_co_u32_e32 v17, vcc, v1, v9, vcc 5463; GFX9-NEXT: v_subb_co_u32_e32 v18, vcc, v2, v10, vcc 5464; GFX9-NEXT: v_subb_co_u32_e32 v19, vcc, v3, v11, vcc 5465; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[16:17], v[0:1] 5466; GFX9-NEXT: v_bfrev_b32_e32 v20, 1 5467; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5468; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[18:19], v[2:3] 5469; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 5470; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[18:19], v[2:3] 5471; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 5472; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[8:9] 5473; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 5474; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[10:11] 5475; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 5476; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] 5477; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 5478; GFX9-NEXT: v_xor_b32_e32 v0, v1, v0 5479; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v19 5480; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0, v1 5481; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc 5482; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc 5483; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v1, v20, vcc 5484; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 5485; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5486; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v2, vcc 5487; GFX9-NEXT: v_cndmask_b32_e32 v1, v17, v3, vcc 5488; GFX9-NEXT: v_cndmask_b32_e32 v2, v18, v8, vcc 5489; GFX9-NEXT: v_cndmask_b32_e32 v3, v19, v9, vcc 5490; GFX9-NEXT: v_sub_co_u32_e32 v8, vcc, v4, v12 5491; GFX9-NEXT: v_subb_co_u32_e32 v9, vcc, v5, v13, vcc 5492; GFX9-NEXT: v_subb_co_u32_e32 v10, vcc, v6, v14, vcc 5493; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, v7, v15, vcc 5494; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[8:9], v[4:5] 5495; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 5496; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[10:11], v[6:7] 5497; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 5498; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, v[10:11], v[6:7] 5499; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc 5500; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[12:13] 5501; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 5502; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[14:15] 5503; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 5504; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] 5505; GFX9-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc 5506; GFX9-NEXT: v_xor_b32_e32 v4, v5, v4 5507; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v11 5508; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, 0, v5 5509; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v5, vcc 5510; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, 0, v5, vcc 5511; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, v5, v20, vcc 5512; GFX9-NEXT: v_and_b32_e32 v4, 1, v4 5513; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 5514; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v6, vcc 5515; GFX9-NEXT: v_cndmask_b32_e32 v5, v9, v7, vcc 5516; GFX9-NEXT: v_cndmask_b32_e32 v6, v10, v12, vcc 5517; GFX9-NEXT: v_cndmask_b32_e32 v7, v11, v13, vcc 5518; GFX9-NEXT: s_setpc_b64 s[30:31] 5519; 5520; GFX10-LABEL: v_ssubsat_v2i128: 5521; GFX10: ; %bb.0: 5522; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5523; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5524; GFX10-NEXT: v_sub_co_u32 v16, vcc_lo, v0, v8 5525; GFX10-NEXT: v_sub_co_ci_u32_e32 v17, vcc_lo, v1, v9, vcc_lo 5526; GFX10-NEXT: v_sub_co_ci_u32_e32 v18, vcc_lo, v2, v10, vcc_lo 5527; GFX10-NEXT: v_sub_co_ci_u32_e32 v19, vcc_lo, v3, v11, vcc_lo 5528; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[16:17], v[0:1] 5529; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 5530; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[18:19], v[2:3] 5531; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 5532; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, 0, v[8:9] 5533; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo 5534; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, 0, v[10:11] 5535; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc_lo 5536; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[18:19], v[2:3] 5537; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 5538; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, 0, v[10:11] 5539; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc_lo 5540; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v4, v12 5541; GFX10-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, v5, v13, vcc_lo 5542; GFX10-NEXT: v_sub_co_ci_u32_e32 v10, vcc_lo, v6, v14, vcc_lo 5543; GFX10-NEXT: v_sub_co_ci_u32_e32 v11, vcc_lo, v7, v15, vcc_lo 5544; GFX10-NEXT: v_cmp_lt_u64_e64 s4, v[8:9], v[4:5] 5545; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 5546; GFX10-NEXT: v_ashrrev_i32_e32 v1, 31, v19 5547; GFX10-NEXT: v_cmp_eq_u64_e64 s5, v[10:11], v[6:7] 5548; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 5549; GFX10-NEXT: v_cmp_lt_i64_e64 s4, v[10:11], v[6:7] 5550; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 5551; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v1, 0 5552; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo 5553; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 5554; GFX10-NEXT: v_cmp_lt_u64_e64 s4, 0, v[12:13] 5555; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v11 5556; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s4 5557; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[14:15] 5558; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s4 5559; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, v0 5560; GFX10-NEXT: v_cndmask_b32_e64 v0, v5, v4, s5 5561; GFX10-NEXT: v_cmp_eq_u64_e64 s5, 0, v[14:15] 5562; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo 5563; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0x80000000, v1, vcc_lo 5564; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v3, s4 5565; GFX10-NEXT: v_cndmask_b32_e64 v4, v13, v12, s5 5566; GFX10-NEXT: v_xor_b32_e32 v4, v4, v0 5567; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v2, s4 5568; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v5, s4 5569; GFX10-NEXT: v_and_b32_e32 v3, 1, v4 5570; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v7, 0 5571; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v7, vcc_lo 5572; GFX10-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, 0, v7, vcc_lo 5573; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 0, v3 5574; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0x80000000, v7, vcc_lo 5575; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v6, s4 5576; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5 5577; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v5, s5 5578; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v12, s5 5579; GFX10-NEXT: v_cndmask_b32_e64 v7, v11, v7, s5 5580; GFX10-NEXT: s_setpc_b64 s[30:31] 5581 %result = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %lhs, <2 x i128> %rhs) 5582 ret <2 x i128> %result 5583} 5584 5585define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs) { 5586; GFX6-LABEL: s_ssubsat_v2i128: 5587; GFX6: ; %bb.0: 5588; GFX6-NEXT: s_sub_u32 s16, s0, s8 5589; GFX6-NEXT: s_cselect_b32 s17, 1, 0 5590; GFX6-NEXT: s_and_b32 s17, s17, 1 5591; GFX6-NEXT: s_cmp_lg_u32 s17, 0 5592; GFX6-NEXT: s_subb_u32 s17, s1, s9 5593; GFX6-NEXT: s_cselect_b32 s18, 1, 0 5594; GFX6-NEXT: s_and_b32 s18, s18, 1 5595; GFX6-NEXT: s_cmp_lg_u32 s18, 0 5596; GFX6-NEXT: s_subb_u32 s18, s2, s10 5597; GFX6-NEXT: s_cselect_b32 s19, 1, 0 5598; GFX6-NEXT: v_mov_b32_e32 v3, s1 5599; GFX6-NEXT: s_and_b32 s19, s19, 1 5600; GFX6-NEXT: v_mov_b32_e32 v2, s0 5601; GFX6-NEXT: s_cmp_lg_u32 s19, 0 5602; GFX6-NEXT: v_mov_b32_e32 v0, s2 5603; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, s[16:17], v[2:3] 5604; GFX6-NEXT: s_subb_u32 s19, s3, s11 5605; GFX6-NEXT: v_mov_b32_e32 v1, s3 5606; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 5607; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, s[18:19], v[0:1] 5608; GFX6-NEXT: v_cmp_gt_u64_e64 s[0:1], s[8:9], 0 5609; GFX6-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 5610; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, s[18:19], v[0:1] 5611; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 5612; GFX6-NEXT: v_cmp_gt_i64_e64 s[0:1], s[10:11], 0 5613; GFX6-NEXT: s_ashr_i32 s3, s19, 31 5614; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 5615; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 5616; GFX6-NEXT: s_add_u32 s0, s3, 0 5617; GFX6-NEXT: s_cselect_b32 s1, 1, 0 5618; GFX6-NEXT: s_and_b32 s1, s1, 1 5619; GFX6-NEXT: s_cmp_lg_u32 s1, 0 5620; GFX6-NEXT: s_addc_u32 s1, s3, 0 5621; GFX6-NEXT: s_cselect_b32 s2, 1, 0 5622; GFX6-NEXT: s_and_b32 s2, s2, 1 5623; GFX6-NEXT: s_cmp_lg_u32 s2, 0 5624; GFX6-NEXT: s_addc_u32 s2, s3, 0 5625; GFX6-NEXT: s_cselect_b32 s9, 1, 0 5626; GFX6-NEXT: v_cmp_eq_u64_e64 vcc, s[10:11], 0 5627; GFX6-NEXT: s_and_b32 s9, s9, 1 5628; GFX6-NEXT: s_brev_b32 s8, 1 5629; GFX6-NEXT: s_cmp_lg_u32 s9, 0 5630; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 5631; GFX6-NEXT: s_addc_u32 s3, s3, s8 5632; GFX6-NEXT: v_xor_b32_e32 v0, v1, v0 5633; GFX6-NEXT: v_mov_b32_e32 v1, s0 5634; GFX6-NEXT: s_sub_u32 s0, s4, s12 5635; GFX6-NEXT: v_mov_b32_e32 v2, s1 5636; GFX6-NEXT: s_cselect_b32 s1, 1, 0 5637; GFX6-NEXT: s_and_b32 s1, s1, 1 5638; GFX6-NEXT: s_cmp_lg_u32 s1, 0 5639; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 5640; GFX6-NEXT: s_subb_u32 s1, s5, s13 5641; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5642; GFX6-NEXT: v_mov_b32_e32 v0, s2 5643; GFX6-NEXT: s_cselect_b32 s2, 1, 0 5644; GFX6-NEXT: s_and_b32 s2, s2, 1 5645; GFX6-NEXT: v_mov_b32_e32 v3, s16 5646; GFX6-NEXT: v_mov_b32_e32 v4, s17 5647; GFX6-NEXT: s_cmp_lg_u32 s2, 0 5648; GFX6-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc 5649; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc 5650; GFX6-NEXT: v_mov_b32_e32 v1, s3 5651; GFX6-NEXT: v_mov_b32_e32 v2, s18 5652; GFX6-NEXT: v_mov_b32_e32 v3, s19 5653; GFX6-NEXT: s_subb_u32 s2, s6, s14 5654; GFX6-NEXT: v_cndmask_b32_e32 v6, v2, v0, vcc 5655; GFX6-NEXT: v_cndmask_b32_e32 v7, v3, v1, vcc 5656; GFX6-NEXT: s_cselect_b32 s3, 1, 0 5657; GFX6-NEXT: v_mov_b32_e32 v2, s4 5658; GFX6-NEXT: s_and_b32 s3, s3, 1 5659; GFX6-NEXT: v_mov_b32_e32 v3, s5 5660; GFX6-NEXT: s_cmp_lg_u32 s3, 0 5661; GFX6-NEXT: v_mov_b32_e32 v0, s6 5662; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[2:3] 5663; GFX6-NEXT: s_subb_u32 s3, s7, s15 5664; GFX6-NEXT: v_mov_b32_e32 v1, s7 5665; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 5666; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1] 5667; GFX6-NEXT: v_cmp_gt_u64_e64 s[4:5], s[12:13], 0 5668; GFX6-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 5669; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, s[2:3], v[0:1] 5670; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 5671; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], s[14:15], 0 5672; GFX6-NEXT: s_ashr_i32 s7, s3, 31 5673; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 5674; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] 5675; GFX6-NEXT: s_add_u32 s4, s7, 0 5676; GFX6-NEXT: s_cselect_b32 s5, 1, 0 5677; GFX6-NEXT: s_and_b32 s5, s5, 1 5678; GFX6-NEXT: s_cmp_lg_u32 s5, 0 5679; GFX6-NEXT: s_addc_u32 s5, s7, 0 5680; GFX6-NEXT: s_cselect_b32 s6, 1, 0 5681; GFX6-NEXT: s_and_b32 s6, s6, 1 5682; GFX6-NEXT: v_cmp_eq_u64_e64 vcc, s[14:15], 0 5683; GFX6-NEXT: s_cmp_lg_u32 s6, 0 5684; GFX6-NEXT: s_addc_u32 s6, s7, 0 5685; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 5686; GFX6-NEXT: s_cselect_b32 s9, 1, 0 5687; GFX6-NEXT: v_xor_b32_e32 v0, v1, v0 5688; GFX6-NEXT: s_and_b32 s9, s9, 1 5689; GFX6-NEXT: s_cmp_lg_u32 s9, 0 5690; GFX6-NEXT: v_and_b32_e32 v0, 1, v0 5691; GFX6-NEXT: s_addc_u32 s7, s7, s8 5692; GFX6-NEXT: v_mov_b32_e32 v1, s4 5693; GFX6-NEXT: v_mov_b32_e32 v2, s5 5694; GFX6-NEXT: v_mov_b32_e32 v3, s0 5695; GFX6-NEXT: v_mov_b32_e32 v8, s1 5696; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5697; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 5698; GFX6-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc 5699; GFX6-NEXT: v_mov_b32_e32 v2, s6 5700; GFX6-NEXT: v_mov_b32_e32 v3, s7 5701; GFX6-NEXT: v_mov_b32_e32 v8, s2 5702; GFX6-NEXT: v_mov_b32_e32 v9, s3 5703; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 5704; GFX6-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc 5705; GFX6-NEXT: v_readfirstlane_b32 s0, v5 5706; GFX6-NEXT: v_readfirstlane_b32 s1, v4 5707; GFX6-NEXT: v_readfirstlane_b32 s2, v6 5708; GFX6-NEXT: v_readfirstlane_b32 s3, v7 5709; GFX6-NEXT: v_readfirstlane_b32 s4, v0 5710; GFX6-NEXT: v_readfirstlane_b32 s5, v1 5711; GFX6-NEXT: v_readfirstlane_b32 s6, v2 5712; GFX6-NEXT: v_readfirstlane_b32 s7, v3 5713; GFX6-NEXT: ; return to shader part epilog 5714; 5715; GFX8-LABEL: s_ssubsat_v2i128: 5716; GFX8: ; %bb.0: 5717; GFX8-NEXT: s_sub_u32 s16, s0, s8 5718; GFX8-NEXT: s_cselect_b32 s17, 1, 0 5719; GFX8-NEXT: s_and_b32 s17, s17, 1 5720; GFX8-NEXT: s_cmp_lg_u32 s17, 0 5721; GFX8-NEXT: s_subb_u32 s17, s1, s9 5722; GFX8-NEXT: s_cselect_b32 s18, 1, 0 5723; GFX8-NEXT: s_and_b32 s18, s18, 1 5724; GFX8-NEXT: s_cmp_lg_u32 s18, 0 5725; GFX8-NEXT: s_subb_u32 s18, s2, s10 5726; GFX8-NEXT: s_cselect_b32 s19, 1, 0 5727; GFX8-NEXT: s_and_b32 s19, s19, 1 5728; GFX8-NEXT: v_mov_b32_e32 v3, s1 5729; GFX8-NEXT: s_cmp_lg_u32 s19, 0 5730; GFX8-NEXT: v_mov_b32_e32 v2, s0 5731; GFX8-NEXT: s_subb_u32 s19, s3, s11 5732; GFX8-NEXT: v_mov_b32_e32 v0, s2 5733; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, s[16:17], v[2:3] 5734; GFX8-NEXT: v_mov_b32_e32 v1, s3 5735; GFX8-NEXT: s_cmp_eq_u64 s[18:19], s[2:3] 5736; GFX8-NEXT: s_cselect_b32 s2, 1, 0 5737; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 5738; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[18:19], v[0:1] 5739; GFX8-NEXT: s_and_b32 s0, 1, s2 5740; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5741; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 5742; GFX8-NEXT: v_cmp_gt_u64_e64 s[0:1], s[8:9], 0 5743; GFX8-NEXT: s_cmp_eq_u64 s[10:11], 0 5744; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 5745; GFX8-NEXT: v_cmp_gt_i64_e64 s[0:1], s[10:11], 0 5746; GFX8-NEXT: s_cselect_b32 s2, 1, 0 5747; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5748; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 5749; GFX8-NEXT: s_and_b32 s0, 1, s2 5750; GFX8-NEXT: s_ashr_i32 s3, s19, 31 5751; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 5752; GFX8-NEXT: s_add_u32 s0, s3, 0 5753; GFX8-NEXT: s_cselect_b32 s1, 1, 0 5754; GFX8-NEXT: s_and_b32 s1, s1, 1 5755; GFX8-NEXT: s_cmp_lg_u32 s1, 0 5756; GFX8-NEXT: s_addc_u32 s1, s3, 0 5757; GFX8-NEXT: s_cselect_b32 s2, 1, 0 5758; GFX8-NEXT: s_and_b32 s2, s2, 1 5759; GFX8-NEXT: s_cmp_lg_u32 s2, 0 5760; GFX8-NEXT: s_addc_u32 s2, s3, 0 5761; GFX8-NEXT: s_cselect_b32 s9, 1, 0 5762; GFX8-NEXT: s_and_b32 s9, s9, 1 5763; GFX8-NEXT: s_brev_b32 s8, 1 5764; GFX8-NEXT: s_cmp_lg_u32 s9, 0 5765; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 5766; GFX8-NEXT: s_addc_u32 s3, s3, s8 5767; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 5768; GFX8-NEXT: v_mov_b32_e32 v1, s0 5769; GFX8-NEXT: s_sub_u32 s0, s4, s12 5770; GFX8-NEXT: v_mov_b32_e32 v2, s1 5771; GFX8-NEXT: s_cselect_b32 s1, 1, 0 5772; GFX8-NEXT: s_and_b32 s1, s1, 1 5773; GFX8-NEXT: s_cmp_lg_u32 s1, 0 5774; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 5775; GFX8-NEXT: s_subb_u32 s1, s5, s13 5776; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5777; GFX8-NEXT: v_mov_b32_e32 v0, s2 5778; GFX8-NEXT: s_cselect_b32 s2, 1, 0 5779; GFX8-NEXT: s_and_b32 s2, s2, 1 5780; GFX8-NEXT: s_cmp_lg_u32 s2, 0 5781; GFX8-NEXT: v_mov_b32_e32 v3, s16 5782; GFX8-NEXT: v_mov_b32_e32 v4, s17 5783; GFX8-NEXT: s_subb_u32 s2, s6, s14 5784; GFX8-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc 5785; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc 5786; GFX8-NEXT: v_mov_b32_e32 v1, s3 5787; GFX8-NEXT: v_mov_b32_e32 v2, s18 5788; GFX8-NEXT: v_mov_b32_e32 v3, s19 5789; GFX8-NEXT: s_cselect_b32 s3, 1, 0 5790; GFX8-NEXT: v_cndmask_b32_e32 v6, v2, v0, vcc 5791; GFX8-NEXT: v_cndmask_b32_e32 v7, v3, v1, vcc 5792; GFX8-NEXT: s_and_b32 s3, s3, 1 5793; GFX8-NEXT: v_mov_b32_e32 v2, s4 5794; GFX8-NEXT: s_cmp_lg_u32 s3, 0 5795; GFX8-NEXT: v_mov_b32_e32 v3, s5 5796; GFX8-NEXT: s_subb_u32 s3, s7, s15 5797; GFX8-NEXT: v_mov_b32_e32 v0, s6 5798; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[2:3] 5799; GFX8-NEXT: v_mov_b32_e32 v1, s7 5800; GFX8-NEXT: s_cmp_eq_u64 s[2:3], s[6:7] 5801; GFX8-NEXT: s_cselect_b32 s6, 1, 0 5802; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 5803; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1] 5804; GFX8-NEXT: s_and_b32 s4, 1, s6 5805; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5806; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5807; GFX8-NEXT: v_cmp_gt_u64_e64 s[4:5], s[12:13], 0 5808; GFX8-NEXT: s_cmp_eq_u64 s[14:15], 0 5809; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 5810; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], s[14:15], 0 5811; GFX8-NEXT: s_cselect_b32 s6, 1, 0 5812; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5813; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] 5814; GFX8-NEXT: s_and_b32 s4, 1, s6 5815; GFX8-NEXT: s_ashr_i32 s7, s3, 31 5816; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5817; GFX8-NEXT: s_add_u32 s4, s7, 0 5818; GFX8-NEXT: s_cselect_b32 s5, 1, 0 5819; GFX8-NEXT: s_and_b32 s5, s5, 1 5820; GFX8-NEXT: s_cmp_lg_u32 s5, 0 5821; GFX8-NEXT: s_addc_u32 s5, s7, 0 5822; GFX8-NEXT: s_cselect_b32 s6, 1, 0 5823; GFX8-NEXT: s_and_b32 s6, s6, 1 5824; GFX8-NEXT: s_cmp_lg_u32 s6, 0 5825; GFX8-NEXT: s_addc_u32 s6, s7, 0 5826; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 5827; GFX8-NEXT: s_cselect_b32 s9, 1, 0 5828; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 5829; GFX8-NEXT: s_and_b32 s9, s9, 1 5830; GFX8-NEXT: s_cmp_lg_u32 s9, 0 5831; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 5832; GFX8-NEXT: s_addc_u32 s7, s7, s8 5833; GFX8-NEXT: v_mov_b32_e32 v1, s4 5834; GFX8-NEXT: v_mov_b32_e32 v2, s5 5835; GFX8-NEXT: v_mov_b32_e32 v3, s0 5836; GFX8-NEXT: v_mov_b32_e32 v8, s1 5837; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5838; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 5839; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc 5840; GFX8-NEXT: v_mov_b32_e32 v2, s6 5841; GFX8-NEXT: v_mov_b32_e32 v3, s7 5842; GFX8-NEXT: v_mov_b32_e32 v8, s2 5843; GFX8-NEXT: v_mov_b32_e32 v9, s3 5844; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 5845; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc 5846; GFX8-NEXT: v_readfirstlane_b32 s0, v5 5847; GFX8-NEXT: v_readfirstlane_b32 s1, v4 5848; GFX8-NEXT: v_readfirstlane_b32 s2, v6 5849; GFX8-NEXT: v_readfirstlane_b32 s3, v7 5850; GFX8-NEXT: v_readfirstlane_b32 s4, v0 5851; GFX8-NEXT: v_readfirstlane_b32 s5, v1 5852; GFX8-NEXT: v_readfirstlane_b32 s6, v2 5853; GFX8-NEXT: v_readfirstlane_b32 s7, v3 5854; GFX8-NEXT: ; return to shader part epilog 5855; 5856; GFX9-LABEL: s_ssubsat_v2i128: 5857; GFX9: ; %bb.0: 5858; GFX9-NEXT: s_sub_u32 s16, s0, s8 5859; GFX9-NEXT: s_cselect_b32 s17, 1, 0 5860; GFX9-NEXT: s_and_b32 s17, s17, 1 5861; GFX9-NEXT: s_cmp_lg_u32 s17, 0 5862; GFX9-NEXT: s_subb_u32 s17, s1, s9 5863; GFX9-NEXT: s_cselect_b32 s18, 1, 0 5864; GFX9-NEXT: s_and_b32 s18, s18, 1 5865; GFX9-NEXT: s_cmp_lg_u32 s18, 0 5866; GFX9-NEXT: s_subb_u32 s18, s2, s10 5867; GFX9-NEXT: s_cselect_b32 s19, 1, 0 5868; GFX9-NEXT: s_and_b32 s19, s19, 1 5869; GFX9-NEXT: v_mov_b32_e32 v3, s1 5870; GFX9-NEXT: s_cmp_lg_u32 s19, 0 5871; GFX9-NEXT: v_mov_b32_e32 v2, s0 5872; GFX9-NEXT: s_subb_u32 s19, s3, s11 5873; GFX9-NEXT: v_mov_b32_e32 v0, s2 5874; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[16:17], v[2:3] 5875; GFX9-NEXT: v_mov_b32_e32 v1, s3 5876; GFX9-NEXT: s_cmp_eq_u64 s[18:19], s[2:3] 5877; GFX9-NEXT: s_cselect_b32 s2, 1, 0 5878; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 5879; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[18:19], v[0:1] 5880; GFX9-NEXT: s_and_b32 s0, 1, s2 5881; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5882; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 5883; GFX9-NEXT: v_cmp_gt_u64_e64 s[0:1], s[8:9], 0 5884; GFX9-NEXT: s_cmp_eq_u64 s[10:11], 0 5885; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 5886; GFX9-NEXT: v_cmp_gt_i64_e64 s[0:1], s[10:11], 0 5887; GFX9-NEXT: s_cselect_b32 s2, 1, 0 5888; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5889; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 5890; GFX9-NEXT: s_and_b32 s0, 1, s2 5891; GFX9-NEXT: s_ashr_i32 s3, s19, 31 5892; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 5893; GFX9-NEXT: s_add_u32 s0, s3, 0 5894; GFX9-NEXT: s_cselect_b32 s1, 1, 0 5895; GFX9-NEXT: s_and_b32 s1, s1, 1 5896; GFX9-NEXT: s_cmp_lg_u32 s1, 0 5897; GFX9-NEXT: s_addc_u32 s1, s3, 0 5898; GFX9-NEXT: s_cselect_b32 s2, 1, 0 5899; GFX9-NEXT: s_and_b32 s2, s2, 1 5900; GFX9-NEXT: s_cmp_lg_u32 s2, 0 5901; GFX9-NEXT: s_addc_u32 s2, s3, 0 5902; GFX9-NEXT: s_cselect_b32 s9, 1, 0 5903; GFX9-NEXT: s_and_b32 s9, s9, 1 5904; GFX9-NEXT: s_brev_b32 s8, 1 5905; GFX9-NEXT: s_cmp_lg_u32 s9, 0 5906; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 5907; GFX9-NEXT: s_addc_u32 s3, s3, s8 5908; GFX9-NEXT: v_xor_b32_e32 v0, v1, v0 5909; GFX9-NEXT: v_mov_b32_e32 v1, s0 5910; GFX9-NEXT: s_sub_u32 s0, s4, s12 5911; GFX9-NEXT: v_mov_b32_e32 v2, s1 5912; GFX9-NEXT: s_cselect_b32 s1, 1, 0 5913; GFX9-NEXT: s_and_b32 s1, s1, 1 5914; GFX9-NEXT: s_cmp_lg_u32 s1, 0 5915; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 5916; GFX9-NEXT: s_subb_u32 s1, s5, s13 5917; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5918; GFX9-NEXT: v_mov_b32_e32 v0, s2 5919; GFX9-NEXT: s_cselect_b32 s2, 1, 0 5920; GFX9-NEXT: s_and_b32 s2, s2, 1 5921; GFX9-NEXT: s_cmp_lg_u32 s2, 0 5922; GFX9-NEXT: v_mov_b32_e32 v3, s16 5923; GFX9-NEXT: v_mov_b32_e32 v4, s17 5924; GFX9-NEXT: s_subb_u32 s2, s6, s14 5925; GFX9-NEXT: v_cndmask_b32_e32 v5, v3, v1, vcc 5926; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc 5927; GFX9-NEXT: v_mov_b32_e32 v1, s3 5928; GFX9-NEXT: v_mov_b32_e32 v2, s18 5929; GFX9-NEXT: v_mov_b32_e32 v3, s19 5930; GFX9-NEXT: s_cselect_b32 s3, 1, 0 5931; GFX9-NEXT: v_cndmask_b32_e32 v6, v2, v0, vcc 5932; GFX9-NEXT: v_cndmask_b32_e32 v7, v3, v1, vcc 5933; GFX9-NEXT: s_and_b32 s3, s3, 1 5934; GFX9-NEXT: v_mov_b32_e32 v2, s4 5935; GFX9-NEXT: s_cmp_lg_u32 s3, 0 5936; GFX9-NEXT: v_mov_b32_e32 v3, s5 5937; GFX9-NEXT: s_subb_u32 s3, s7, s15 5938; GFX9-NEXT: v_mov_b32_e32 v0, s6 5939; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[2:3] 5940; GFX9-NEXT: v_mov_b32_e32 v1, s7 5941; GFX9-NEXT: s_cmp_eq_u64 s[2:3], s[6:7] 5942; GFX9-NEXT: s_cselect_b32 s6, 1, 0 5943; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 5944; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1] 5945; GFX9-NEXT: s_and_b32 s4, 1, s6 5946; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 5947; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5948; GFX9-NEXT: v_cmp_gt_u64_e64 s[4:5], s[12:13], 0 5949; GFX9-NEXT: s_cmp_eq_u64 s[14:15], 0 5950; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 5951; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], s[14:15], 0 5952; GFX9-NEXT: s_cselect_b32 s6, 1, 0 5953; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5954; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] 5955; GFX9-NEXT: s_and_b32 s4, 1, s6 5956; GFX9-NEXT: s_ashr_i32 s7, s3, 31 5957; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5958; GFX9-NEXT: s_add_u32 s4, s7, 0 5959; GFX9-NEXT: s_cselect_b32 s5, 1, 0 5960; GFX9-NEXT: s_and_b32 s5, s5, 1 5961; GFX9-NEXT: s_cmp_lg_u32 s5, 0 5962; GFX9-NEXT: s_addc_u32 s5, s7, 0 5963; GFX9-NEXT: s_cselect_b32 s6, 1, 0 5964; GFX9-NEXT: s_and_b32 s6, s6, 1 5965; GFX9-NEXT: s_cmp_lg_u32 s6, 0 5966; GFX9-NEXT: s_addc_u32 s6, s7, 0 5967; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 5968; GFX9-NEXT: s_cselect_b32 s9, 1, 0 5969; GFX9-NEXT: v_xor_b32_e32 v0, v1, v0 5970; GFX9-NEXT: s_and_b32 s9, s9, 1 5971; GFX9-NEXT: s_cmp_lg_u32 s9, 0 5972; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 5973; GFX9-NEXT: s_addc_u32 s7, s7, s8 5974; GFX9-NEXT: v_mov_b32_e32 v1, s4 5975; GFX9-NEXT: v_mov_b32_e32 v2, s5 5976; GFX9-NEXT: v_mov_b32_e32 v3, s0 5977; GFX9-NEXT: v_mov_b32_e32 v8, s1 5978; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 5979; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 5980; GFX9-NEXT: v_cndmask_b32_e32 v1, v8, v2, vcc 5981; GFX9-NEXT: v_mov_b32_e32 v2, s6 5982; GFX9-NEXT: v_mov_b32_e32 v3, s7 5983; GFX9-NEXT: v_mov_b32_e32 v8, s2 5984; GFX9-NEXT: v_mov_b32_e32 v9, s3 5985; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 5986; GFX9-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc 5987; GFX9-NEXT: v_readfirstlane_b32 s0, v5 5988; GFX9-NEXT: v_readfirstlane_b32 s1, v4 5989; GFX9-NEXT: v_readfirstlane_b32 s2, v6 5990; GFX9-NEXT: v_readfirstlane_b32 s3, v7 5991; GFX9-NEXT: v_readfirstlane_b32 s4, v0 5992; GFX9-NEXT: v_readfirstlane_b32 s5, v1 5993; GFX9-NEXT: v_readfirstlane_b32 s6, v2 5994; GFX9-NEXT: v_readfirstlane_b32 s7, v3 5995; GFX9-NEXT: ; return to shader part epilog 5996; 5997; GFX10-LABEL: s_ssubsat_v2i128: 5998; GFX10: ; %bb.0: 5999; GFX10-NEXT: s_sub_u32 s16, s0, s8 6000; GFX10-NEXT: s_cselect_b32 s17, 1, 0 6001; GFX10-NEXT: s_and_b32 s17, s17, 1 6002; GFX10-NEXT: s_cmp_lg_u32 s17, 0 6003; GFX10-NEXT: s_subb_u32 s17, s1, s9 6004; GFX10-NEXT: s_cselect_b32 s18, 1, 0 6005; GFX10-NEXT: v_cmp_lt_u64_e64 s0, s[16:17], s[0:1] 6006; GFX10-NEXT: s_and_b32 s18, s18, 1 6007; GFX10-NEXT: v_cmp_gt_u64_e64 s1, s[8:9], 0 6008; GFX10-NEXT: s_cmp_lg_u32 s18, 0 6009; GFX10-NEXT: s_subb_u32 s18, s2, s10 6010; GFX10-NEXT: s_cselect_b32 s19, 1, 0 6011; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 6012; GFX10-NEXT: s_and_b32 s19, s19, 1 6013; GFX10-NEXT: s_cmp_lg_u32 s19, 0 6014; GFX10-NEXT: s_subb_u32 s19, s3, s11 6015; GFX10-NEXT: v_cmp_lt_i64_e64 s0, s[18:19], s[2:3] 6016; GFX10-NEXT: s_cmp_eq_u64 s[18:19], s[2:3] 6017; GFX10-NEXT: v_mov_b32_e32 v3, s19 6018; GFX10-NEXT: s_cselect_b32 s20, 1, 0 6019; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 6020; GFX10-NEXT: s_and_b32 s0, 1, s20 6021; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 6022; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 6023; GFX10-NEXT: s_cselect_b32 s0, 1, 0 6024; GFX10-NEXT: s_ashr_i32 s3, s19, 31 6025; GFX10-NEXT: s_and_b32 s0, 1, s0 6026; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 6027; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s1 6028; GFX10-NEXT: v_cmp_gt_i64_e64 s1, s[10:11], 0 6029; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 6030; GFX10-NEXT: s_add_u32 s0, s3, 0 6031; GFX10-NEXT: s_brev_b32 s10, 1 6032; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s1 6033; GFX10-NEXT: s_cselect_b32 s1, 1, 0 6034; GFX10-NEXT: s_and_b32 s1, s1, 1 6035; GFX10-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo 6036; GFX10-NEXT: s_cmp_lg_u32 s1, 0 6037; GFX10-NEXT: v_mov_b32_e32 v2, s17 6038; GFX10-NEXT: s_addc_u32 s1, s3, 0 6039; GFX10-NEXT: s_cselect_b32 s2, 1, 0 6040; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 6041; GFX10-NEXT: s_and_b32 s2, s2, 1 6042; GFX10-NEXT: v_mov_b32_e32 v1, s16 6043; GFX10-NEXT: s_cmp_lg_u32 s2, 0 6044; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 6045; GFX10-NEXT: s_addc_u32 s2, s3, 0 6046; GFX10-NEXT: s_cselect_b32 s8, 1, 0 6047; GFX10-NEXT: s_and_b32 s8, s8, 1 6048; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 6049; GFX10-NEXT: s_cmp_lg_u32 s8, 0 6050; GFX10-NEXT: s_addc_u32 s3, s3, s10 6051; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo 6052; GFX10-NEXT: s_sub_u32 s0, s4, s12 6053; GFX10-NEXT: s_cselect_b32 s8, 1, 0 6054; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, s1, vcc_lo 6055; GFX10-NEXT: s_and_b32 s8, s8, 1 6056; GFX10-NEXT: v_mov_b32_e32 v2, s18 6057; GFX10-NEXT: s_cmp_lg_u32 s8, 0 6058; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s3, vcc_lo 6059; GFX10-NEXT: s_subb_u32 s1, s5, s13 6060; GFX10-NEXT: s_cselect_b32 s8, 1, 0 6061; GFX10-NEXT: v_cmp_lt_u64_e64 s4, s[0:1], s[4:5] 6062; GFX10-NEXT: s_and_b32 s8, s8, 1 6063; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo 6064; GFX10-NEXT: s_cmp_lg_u32 s8, 0 6065; GFX10-NEXT: v_cmp_gt_u64_e64 s3, s[12:13], 0 6066; GFX10-NEXT: s_subb_u32 s8, s6, s14 6067; GFX10-NEXT: s_cselect_b32 s9, 1, 0 6068; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 6069; GFX10-NEXT: s_and_b32 s9, s9, 1 6070; GFX10-NEXT: v_mov_b32_e32 v7, s8 6071; GFX10-NEXT: s_cmp_lg_u32 s9, 0 6072; GFX10-NEXT: s_subb_u32 s9, s7, s15 6073; GFX10-NEXT: s_cmp_eq_u64 s[8:9], s[6:7] 6074; GFX10-NEXT: v_cmp_lt_i64_e64 s4, s[8:9], s[6:7] 6075; GFX10-NEXT: s_cselect_b32 s2, 1, 0 6076; GFX10-NEXT: v_mov_b32_e32 v8, s9 6077; GFX10-NEXT: s_and_b32 s2, 1, s2 6078; GFX10-NEXT: s_cmp_eq_u64 s[14:15], 0 6079; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 6080; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 6081; GFX10-NEXT: s_cselect_b32 s2, 1, 0 6082; GFX10-NEXT: s_ashr_i32 s5, s9, 31 6083; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc_lo 6084; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s3 6085; GFX10-NEXT: v_cmp_gt_i64_e64 s3, s[14:15], 0 6086; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s3 6087; GFX10-NEXT: s_and_b32 s3, 1, s2 6088; GFX10-NEXT: s_add_u32 s2, s5, 0 6089; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s3 6090; GFX10-NEXT: s_cselect_b32 s4, 1, 0 6091; GFX10-NEXT: s_and_b32 s4, s4, 1 6092; GFX10-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc_lo 6093; GFX10-NEXT: s_cmp_lg_u32 s4, 0 6094; GFX10-NEXT: v_mov_b32_e32 v6, s1 6095; GFX10-NEXT: s_addc_u32 s3, s5, 0 6096; GFX10-NEXT: s_cselect_b32 s4, 1, 0 6097; GFX10-NEXT: v_xor_b32_e32 v4, v5, v4 6098; GFX10-NEXT: s_and_b32 s4, s4, 1 6099; GFX10-NEXT: v_mov_b32_e32 v5, s0 6100; GFX10-NEXT: s_cmp_lg_u32 s4, 0 6101; GFX10-NEXT: v_readfirstlane_b32 s0, v0 6102; GFX10-NEXT: v_and_b32_e32 v4, 1, v4 6103; GFX10-NEXT: s_addc_u32 s4, s5, 0 6104; GFX10-NEXT: s_cselect_b32 s6, 1, 0 6105; GFX10-NEXT: s_and_b32 s6, s6, 1 6106; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 6107; GFX10-NEXT: s_cmp_lg_u32 s6, 0 6108; GFX10-NEXT: s_addc_u32 s1, s5, s10 6109; GFX10-NEXT: v_cndmask_b32_e64 v4, v5, s2, vcc_lo 6110; GFX10-NEXT: v_cndmask_b32_e64 v5, v6, s3, vcc_lo 6111; GFX10-NEXT: v_cndmask_b32_e64 v6, v7, s4, vcc_lo 6112; GFX10-NEXT: v_cndmask_b32_e64 v7, v8, s1, vcc_lo 6113; GFX10-NEXT: v_readfirstlane_b32 s1, v1 6114; GFX10-NEXT: v_readfirstlane_b32 s2, v2 6115; GFX10-NEXT: v_readfirstlane_b32 s3, v3 6116; GFX10-NEXT: v_readfirstlane_b32 s4, v4 6117; GFX10-NEXT: v_readfirstlane_b32 s5, v5 6118; GFX10-NEXT: v_readfirstlane_b32 s6, v6 6119; GFX10-NEXT: v_readfirstlane_b32 s7, v7 6120; GFX10-NEXT: ; return to shader part epilog 6121 %result = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %lhs, <2 x i128> %rhs) 6122 ret <2 x i128> %result 6123} 6124 6125declare i7 @llvm.ssub.sat.i7(i7, i7) #0 6126declare i8 @llvm.ssub.sat.i8(i8, i8) #0 6127declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) #0 6128declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) #0 6129 6130declare i16 @llvm.ssub.sat.i16(i16, i16) #0 6131declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) #0 6132declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) #0 6133declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) #0 6134declare <5 x i16> @llvm.ssub.sat.v5i16(<5 x i16>, <5 x i16>) #0 6135declare <6 x i16> @llvm.ssub.sat.v6i16(<6 x i16>, <6 x i16>) #0 6136declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) #0 6137 6138declare i24 @llvm.ssub.sat.i24(i24, i24) #0 6139 6140declare i32 @llvm.ssub.sat.i32(i32, i32) #0 6141declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) #0 6142declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) #0 6143declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) #0 6144declare <5 x i32> @llvm.ssub.sat.v5i32(<5 x i32>, <5 x i32>) #0 6145declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) #0 6146 6147declare i48 @llvm.ssub.sat.i48(i48, i48) #0 6148 6149declare i64 @llvm.ssub.sat.i64(i64, i64) #0 6150declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>) #0 6151 6152declare i128 @llvm.ssub.sat.i128(i128, i128) #0 6153declare <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128>, <2 x i128>) #0 6154 6155attributes #0 = { nounwind readnone speculatable willreturn } 6156