1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=fiji -verify-machineinstrs | FileCheck -check-prefix=VI %s 3; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GFX9 %s 4; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s 5 6; =================================================================================== 7; V_ADD_LSHL_U32 8; =================================================================================== 9 10define amdgpu_ps float @add_shl(i32 %a, i32 %b, i32 %c) { 11; VI-LABEL: add_shl: 12; VI: ; %bb.0: 13; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 14; VI-NEXT: v_lshlrev_b32_e32 v0, v2, v0 15; VI-NEXT: ; return to shader part epilog 16; 17; GFX9-LABEL: add_shl: 18; GFX9: ; %bb.0: 19; GFX9-NEXT: v_add_lshl_u32 v0, v0, v1, v2 20; GFX9-NEXT: ; return to shader part epilog 21; 22; GFX10-LABEL: add_shl: 23; GFX10: ; %bb.0: 24; GFX10-NEXT: v_add_lshl_u32 v0, v0, v1, v2 25; GFX10-NEXT: ; return to shader part epilog 26 %x = add i32 %a, %b 27 %result = shl i32 %x, %c 28 %bc = bitcast i32 %result to float 29 ret float %bc 30} 31 32define amdgpu_ps float @add_shl_vgpr_c(i32 inreg %a, i32 inreg %b, i32 %c) { 33; VI-LABEL: add_shl_vgpr_c: 34; VI: ; %bb.0: 35; VI-NEXT: s_add_i32 s2, s2, s3 36; VI-NEXT: v_lshlrev_b32_e64 v0, v0, s2 37; VI-NEXT: ; return to shader part epilog 38; 39; GFX9-LABEL: add_shl_vgpr_c: 40; GFX9: ; %bb.0: 41; GFX9-NEXT: s_add_i32 s2, s2, s3 42; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s2 43; GFX9-NEXT: ; return to shader part epilog 44; 45; GFX10-LABEL: add_shl_vgpr_c: 46; GFX10: ; %bb.0: 47; GFX10-NEXT: v_add_lshl_u32 v0, s2, s3, v0 48; GFX10-NEXT: ; return to shader part epilog 49 %x = add i32 %a, %b 50 %result = shl i32 %x, %c 51 %bc = bitcast i32 %result to float 52 ret float %bc 53} 54 55define amdgpu_ps float @add_shl_vgpr_ac(i32 %a, i32 inreg %b, i32 %c) { 56; VI-LABEL: add_shl_vgpr_ac: 57; VI: ; %bb.0: 58; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 59; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 60; VI-NEXT: ; return to shader part epilog 61; 62; GFX9-LABEL: add_shl_vgpr_ac: 63; GFX9: ; %bb.0: 64; GFX9-NEXT: v_add_lshl_u32 v0, v0, s2, v1 65; GFX9-NEXT: ; return to shader part epilog 66; 67; GFX10-LABEL: add_shl_vgpr_ac: 68; GFX10: ; %bb.0: 69; GFX10-NEXT: v_add_lshl_u32 v0, v0, s2, v1 70; GFX10-NEXT: ; return to shader part epilog 71 %x = add i32 %a, %b 72 %result = shl i32 %x, %c 73 %bc = bitcast i32 %result to float 74 ret float %bc 75} 76 77define amdgpu_ps float @add_shl_vgpr_const(i32 %a, i32 %b) { 78; VI-LABEL: add_shl_vgpr_const: 79; VI: ; %bb.0: 80; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 81; VI-NEXT: v_lshlrev_b32_e32 v0, 9, v0 82; VI-NEXT: ; return to shader part epilog 83; 84; GFX9-LABEL: add_shl_vgpr_const: 85; GFX9: ; %bb.0: 86; GFX9-NEXT: v_add_lshl_u32 v0, v0, v1, 9 87; GFX9-NEXT: ; return to shader part epilog 88; 89; GFX10-LABEL: add_shl_vgpr_const: 90; GFX10: ; %bb.0: 91; GFX10-NEXT: v_add_lshl_u32 v0, v0, v1, 9 92; GFX10-NEXT: ; return to shader part epilog 93 %x = add i32 %a, %b 94 %result = shl i32 %x, 9 95 %bc = bitcast i32 %result to float 96 ret float %bc 97} 98 99define amdgpu_ps float @add_shl_vgpr_const_inline_const(i32 %a) { 100; VI-LABEL: add_shl_vgpr_const_inline_const: 101; VI: ; %bb.0: 102; VI-NEXT: v_lshlrev_b32_e32 v0, 9, v0 103; VI-NEXT: v_add_u32_e32 v0, vcc, 0x7e800, v0 104; VI-NEXT: ; return to shader part epilog 105; 106; GFX9-LABEL: add_shl_vgpr_const_inline_const: 107; GFX9: ; %bb.0: 108; GFX9-NEXT: v_mov_b32_e32 v1, 0x7e800 109; GFX9-NEXT: v_lshl_add_u32 v0, v0, 9, v1 110; GFX9-NEXT: ; return to shader part epilog 111; 112; GFX10-LABEL: add_shl_vgpr_const_inline_const: 113; GFX10: ; %bb.0: 114; GFX10-NEXT: v_lshl_add_u32 v0, v0, 9, 0x7e800 115; GFX10-NEXT: ; return to shader part epilog 116 %x = add i32 %a, 1012 117 %result = shl i32 %x, 9 118 %bc = bitcast i32 %result to float 119 ret float %bc 120} 121 122; TODO: Non-optimal code generation because SelectionDAG combines 123; (shl (add x, CONST), y) ---> (add (shl x, y), CONST'). 124; 125define amdgpu_ps float @add_shl_vgpr_inline_const_x2(i32 %a) { 126; VI-LABEL: add_shl_vgpr_inline_const_x2: 127; VI: ; %bb.0: 128; VI-NEXT: v_lshlrev_b32_e32 v0, 9, v0 129; VI-NEXT: v_add_u32_e32 v0, vcc, 0x600, v0 130; VI-NEXT: ; return to shader part epilog 131; 132; GFX9-LABEL: add_shl_vgpr_inline_const_x2: 133; GFX9: ; %bb.0: 134; GFX9-NEXT: v_mov_b32_e32 v1, 0x600 135; GFX9-NEXT: v_lshl_add_u32 v0, v0, 9, v1 136; GFX9-NEXT: ; return to shader part epilog 137; 138; GFX10-LABEL: add_shl_vgpr_inline_const_x2: 139; GFX10: ; %bb.0: 140; GFX10-NEXT: v_lshl_add_u32 v0, v0, 9, 0x600 141; GFX10-NEXT: ; return to shader part epilog 142 %x = add i32 %a, 3 143 %result = shl i32 %x, 9 144 %bc = bitcast i32 %result to float 145 ret float %bc 146} 147