1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s 3 4; How the replacement of i64 stores with v2i32 stores resulted in 5; breaking other users of the bitcast if they already existed 6 7; GCN-LABEL: {{^}}extract_vector_elt_select_error: 8; GCN: buffer_store_dword 9; GCN: buffer_store_dword 10; GCN: buffer_store_dwordx2 11define amdgpu_kernel void @extract_vector_elt_select_error(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %val) #0 { 12 %vec = bitcast i64 %val to <2 x i32> 13 %elt0 = extractelement <2 x i32> %vec, i32 0 14 %elt1 = extractelement <2 x i32> %vec, i32 1 15 16 store volatile i32 %elt0, i32 addrspace(1)* %out 17 store volatile i32 %elt1, i32 addrspace(1)* %out 18 store volatile i64 %val, i64 addrspace(1)* %in 19 ret void 20} 21 22; GCN-LABEL: {{^}}extract_vector_elt_v2i64: 23define amdgpu_kernel void @extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo) #0 { 24 %p0 = extractelement <2 x i64> %foo, i32 0 25 %p1 = extractelement <2 x i64> %foo, i32 1 26 %out1 = getelementptr i64, i64 addrspace(1)* %out, i32 1 27 store volatile i64 %p1, i64 addrspace(1)* %out 28 store volatile i64 %p0, i64 addrspace(1)* %out1 29 ret void 30} 31 32; GCN-LABEL: {{^}}dyn_extract_vector_elt_v2i64: 33; GCN-NOT: buffer_load 34; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 35; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 36; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] 37; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] 38; SI: store_dwordx2 v[{{[0-9:]+}}] 39; VI: s_cselect_b64 s{{\[}}[[S_LO:[0-9]+]]:[[S_HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] 40; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]] 41; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[S_HI]] 42; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} 43define amdgpu_kernel void @dyn_extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo, i32 %elt) #0 { 44 %dynelt = extractelement <2 x i64> %foo, i32 %elt 45 store volatile i64 %dynelt, i64 addrspace(1)* %out 46 ret void 47} 48 49; GCN-LABEL: {{^}}dyn_extract_vector_elt_v2i64_2: 50; GCN: buffer_load_dwordx4 51; GCN-NOT: buffer_load 52; GCN-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 53; GCN-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 54; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] 55; GCN-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] 56; GCN: store_dwordx2 v[{{[0-9:]+}}] 57define amdgpu_kernel void @dyn_extract_vector_elt_v2i64_2(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %foo, i32 %elt, <2 x i64> %arst) #0 { 58 %load = load volatile <2 x i64>, <2 x i64> addrspace(1)* %foo 59 %or = or <2 x i64> %load, %arst 60 %dynelt = extractelement <2 x i64> %or, i32 %elt 61 store volatile i64 %dynelt, i64 addrspace(1)* %out 62 ret void 63} 64 65; GCN-LABEL: {{^}}dyn_extract_vector_elt_v3i64: 66; SI-NOT: buffer_load 67; SI-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 68; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 69; SI-DAG: s_cmp_eq_u32 [[IDX]], 2 70; SI-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 71; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] 72; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] 73; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] 74; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] 75; SI: store_dwordx2 v[{{[0-9:]+}}] 76; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 77; VI: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] 78; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 2 79; VI: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T0LO]]:[[T0HI]]{{\]}} 80; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T1LO]] 81; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T1HI]] 82; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} 83define amdgpu_kernel void @dyn_extract_vector_elt_v3i64(i64 addrspace(1)* %out, <3 x i64> %foo, i32 %elt) #0 { 84 %dynelt = extractelement <3 x i64> %foo, i32 %elt 85 store volatile i64 %dynelt, i64 addrspace(1)* %out 86 ret void 87} 88 89; GCN-LABEL: {{^}}dyn_extract_vector_elt_v4i64: 90; GCN-NOT: buffer_load 91; SI-DAG: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 92; SI-DAG: s_cselect_b64 [[C1:[^,]+]], -1, 0 93; SI-DAG: s_cmp_eq_u32 [[IDX]], 2 94; SI-DAG: s_cselect_b64 [[C2:[^,]+]], -1, 0 95; SI-DAG: s_cmp_eq_u32 [[IDX]], 3 96; SI-DAG: s_cselect_b64 [[C3:[^,]+]], -1, 0 97; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] 98; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] 99; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C1]] 100; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C2]] 101; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] 102; SI-DAG: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[C3]] 103; SI: store_dwordx2 v[{{[0-9:]+}}] 104; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 105; VI: s_cselect_b64 s{{\[}}[[T0LO:[0-9]+]]:[[T0HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] 106; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 2 107; VI: s_cselect_b64 s{{\[}}[[T1LO:[0-9]+]]:[[T1HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T0LO]]:[[T0HI]]{{\]}} 108; VI: s_cmp_eq_u32 [[IDX:s[0-9]+]], 3 109; VI: s_cselect_b64 s{{\[}}[[T2LO:[0-9]+]]:[[T2HI:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], s{{\[}}[[T1LO]]:[[T1HI]]{{\]}} 110; VI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[T2LO]] 111; VI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[T2HI]] 112; VI: store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} 113define amdgpu_kernel void @dyn_extract_vector_elt_v4i64(i64 addrspace(1)* %out, <4 x i64> %foo, i32 %elt) #0 { 114 %dynelt = extractelement <4 x i64> %foo, i32 %elt 115 store volatile i64 %dynelt, i64 addrspace(1)* %out 116 ret void 117} 118 119attributes #0 = { nounwind } 120