1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIVI,HAWAII %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIVI,FIJI %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s 6 7define void @local_store_i56(i56 addrspace(3)* %ptr, i56 %arg) #0 { 8; CIVI-LABEL: local_store_i56: 9; CIVI: ; %bb.0: 10; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; CIVI-NEXT: s_mov_b32 m0, -1 12; CIVI-NEXT: ds_write_b16 v0, v2 offset:4 13; CIVI-NEXT: ds_write_b32 v0, v1 14; CIVI-NEXT: v_lshrrev_b32_e32 v1, 16, v2 15; CIVI-NEXT: ds_write_b8 v0, v1 offset:6 16; CIVI-NEXT: s_waitcnt lgkmcnt(0) 17; CIVI-NEXT: s_setpc_b64 s[30:31] 18; 19; GFX9-LABEL: local_store_i56: 20; GFX9: ; %bb.0: 21; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:6 23; GFX9-NEXT: ds_write_b16 v0, v2 offset:4 24; GFX9-NEXT: ds_write_b32 v0, v1 25; GFX9-NEXT: s_waitcnt lgkmcnt(0) 26; GFX9-NEXT: s_setpc_b64 s[30:31] 27; 28; GFX10-LABEL: local_store_i56: 29; GFX10: ; %bb.0: 30; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 32; GFX10-NEXT: ds_write_b8_d16_hi v0, v2 offset:6 33; GFX10-NEXT: ds_write_b16 v0, v2 offset:4 34; GFX10-NEXT: ds_write_b32 v0, v1 35; GFX10-NEXT: s_waitcnt lgkmcnt(0) 36; GFX10-NEXT: s_setpc_b64 s[30:31] 37 store i56 %arg, i56 addrspace(3)* %ptr, align 8 38 ret void 39} 40 41define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 { 42; HAWAII-LABEL: local_store_i55: 43; HAWAII: ; %bb.0: 44; HAWAII-NEXT: s_or_b32 s0, s4, 14 45; HAWAII-NEXT: v_mov_b32_e32 v0, s0 46; HAWAII-NEXT: v_mov_b32_e32 v1, s5 47; HAWAII-NEXT: flat_load_ubyte v0, v[0:1] 48; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 49; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 50; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 51; HAWAII-NEXT: s_mov_b32 m0, -1 52; HAWAII-NEXT: s_waitcnt lgkmcnt(0) 53; HAWAII-NEXT: v_mov_b32_e32 v1, s0 54; HAWAII-NEXT: v_mov_b32_e32 v3, s2 55; HAWAII-NEXT: v_mov_b32_e32 v2, s1 56; HAWAII-NEXT: ds_write_b16 v1, v3 offset:4 57; HAWAII-NEXT: s_waitcnt vmcnt(0) 58; HAWAII-NEXT: v_and_b32_e32 v0, 0x7f, v0 59; HAWAII-NEXT: ds_write_b8 v1, v0 offset:6 60; HAWAII-NEXT: ds_write_b32 v1, v2 61; HAWAII-NEXT: s_endpgm 62; 63; FIJI-LABEL: local_store_i55: 64; FIJI: ; %bb.0: 65; FIJI-NEXT: s_or_b32 s0, s4, 14 66; FIJI-NEXT: v_mov_b32_e32 v0, s0 67; FIJI-NEXT: v_mov_b32_e32 v1, s5 68; FIJI-NEXT: flat_load_ubyte v0, v[0:1] 69; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 70; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 71; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc 72; FIJI-NEXT: s_mov_b32 m0, -1 73; FIJI-NEXT: s_waitcnt lgkmcnt(0) 74; FIJI-NEXT: v_mov_b32_e32 v1, s0 75; FIJI-NEXT: v_mov_b32_e32 v3, s1 76; FIJI-NEXT: s_and_b32 s3, s2, 0xffff 77; FIJI-NEXT: v_mov_b32_e32 v2, s2 78; FIJI-NEXT: ds_write_b16 v1, v2 offset:4 79; FIJI-NEXT: s_waitcnt vmcnt(0) 80; FIJI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 81; FIJI-NEXT: v_or_b32_e32 v0, s3, v0 82; FIJI-NEXT: v_bfe_u32 v0, v0, 16, 7 83; FIJI-NEXT: ds_write_b8 v1, v0 offset:6 84; FIJI-NEXT: ds_write_b32 v1, v3 85; FIJI-NEXT: s_endpgm 86; 87; GFX9-LABEL: local_store_i55: 88; GFX9: ; %bb.0: 89; GFX9-NEXT: v_mov_b32_e32 v0, 0 90; GFX9-NEXT: global_load_ubyte_d16_hi v0, v0, s[4:5] offset:14 91; GFX9-NEXT: s_load_dword s2, s[4:5], 0xc 92; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 93; GFX9-NEXT: s_load_dword s1, s[4:5], 0x8 94; GFX9-NEXT: s_waitcnt lgkmcnt(0) 95; GFX9-NEXT: s_and_b32 s3, s2, 0xffff 96; GFX9-NEXT: v_mov_b32_e32 v1, s0 97; GFX9-NEXT: v_mov_b32_e32 v2, s2 98; GFX9-NEXT: v_mov_b32_e32 v3, s1 99; GFX9-NEXT: ds_write_b16 v1, v2 offset:4 100; GFX9-NEXT: s_waitcnt vmcnt(0) 101; GFX9-NEXT: v_or_b32_e32 v0, s3, v0 102; GFX9-NEXT: v_and_b32_e32 v0, 0x7fffff, v0 103; GFX9-NEXT: ds_write_b8_d16_hi v1, v0 offset:6 104; GFX9-NEXT: ds_write_b32 v1, v3 105; GFX9-NEXT: s_endpgm 106; 107; GFX10-LABEL: local_store_i55: 108; GFX10: ; %bb.0: 109; GFX10-NEXT: v_mov_b32_e32 v0, 0 110; GFX10-NEXT: s_clause 0x2 111; GFX10-NEXT: s_load_dword s0, s[4:5], 0xc 112; GFX10-NEXT: s_load_dword s1, s[4:5], 0x0 113; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 114; GFX10-NEXT: global_load_ubyte_d16_hi v0, v0, s[4:5] offset:14 115; GFX10-NEXT: s_waitcnt lgkmcnt(0) 116; GFX10-NEXT: s_and_b32 s3, s0, 0xffff 117; GFX10-NEXT: v_mov_b32_e32 v1, s1 118; GFX10-NEXT: v_mov_b32_e32 v2, s0 119; GFX10-NEXT: v_mov_b32_e32 v3, s2 120; GFX10-NEXT: s_waitcnt vmcnt(0) 121; GFX10-NEXT: v_or_b32_e32 v0, s3, v0 122; GFX10-NEXT: v_and_b32_e32 v0, 0x7fffff, v0 123; GFX10-NEXT: ds_write_b16 v1, v2 offset:4 124; GFX10-NEXT: ds_write_b8_d16_hi v1, v0 offset:6 125; GFX10-NEXT: ds_write_b32 v1, v3 126; GFX10-NEXT: s_endpgm 127 store i55 %arg, i55 addrspace(3)* %ptr, align 8 128 ret void 129} 130 131define amdgpu_kernel void @local_store_i48(i48 addrspace(3)* %ptr, i48 %arg) #0 { 132; HAWAII-LABEL: local_store_i48: 133; HAWAII: ; %bb.0: 134; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0 135; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2 136; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3 137; HAWAII-NEXT: s_mov_b32 m0, -1 138; HAWAII-NEXT: s_waitcnt lgkmcnt(0) 139; HAWAII-NEXT: v_mov_b32_e32 v0, s0 140; HAWAII-NEXT: v_mov_b32_e32 v1, s2 141; HAWAII-NEXT: ds_write_b16 v0, v1 offset:4 142; HAWAII-NEXT: v_mov_b32_e32 v1, s1 143; HAWAII-NEXT: ds_write_b32 v0, v1 144; HAWAII-NEXT: s_endpgm 145; 146; FIJI-LABEL: local_store_i48: 147; FIJI: ; %bb.0: 148; FIJI-NEXT: s_load_dword s0, s[4:5], 0x0 149; FIJI-NEXT: s_load_dword s1, s[4:5], 0x8 150; FIJI-NEXT: s_load_dword s2, s[4:5], 0xc 151; FIJI-NEXT: s_mov_b32 m0, -1 152; FIJI-NEXT: s_waitcnt lgkmcnt(0) 153; FIJI-NEXT: v_mov_b32_e32 v0, s0 154; FIJI-NEXT: v_mov_b32_e32 v1, s2 155; FIJI-NEXT: ds_write_b16 v0, v1 offset:4 156; FIJI-NEXT: v_mov_b32_e32 v1, s1 157; FIJI-NEXT: ds_write_b32 v0, v1 158; FIJI-NEXT: s_endpgm 159; 160; GFX9-LABEL: local_store_i48: 161; GFX9: ; %bb.0: 162; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 163; GFX9-NEXT: s_load_dword s1, s[4:5], 0x8 164; GFX9-NEXT: s_load_dword s2, s[4:5], 0xc 165; GFX9-NEXT: s_waitcnt lgkmcnt(0) 166; GFX9-NEXT: v_mov_b32_e32 v0, s0 167; GFX9-NEXT: v_mov_b32_e32 v2, s1 168; GFX9-NEXT: v_mov_b32_e32 v1, s2 169; GFX9-NEXT: ds_write_b16 v0, v1 offset:4 170; GFX9-NEXT: ds_write_b32 v0, v2 171; GFX9-NEXT: s_endpgm 172; 173; GFX10-LABEL: local_store_i48: 174; GFX10: ; %bb.0: 175; GFX10-NEXT: s_clause 0x2 176; GFX10-NEXT: s_load_dword s0, s[4:5], 0x0 177; GFX10-NEXT: s_load_dword s1, s[4:5], 0xc 178; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 179; GFX10-NEXT: s_waitcnt lgkmcnt(0) 180; GFX10-NEXT: v_mov_b32_e32 v0, s0 181; GFX10-NEXT: v_mov_b32_e32 v1, s1 182; GFX10-NEXT: v_mov_b32_e32 v2, s2 183; GFX10-NEXT: ds_write_b16 v0, v1 offset:4 184; GFX10-NEXT: ds_write_b32 v0, v2 185; GFX10-NEXT: s_endpgm 186 store i48 %arg, i48 addrspace(3)* %ptr, align 8 187 ret void 188} 189 190define amdgpu_kernel void @local_store_i65(i65 addrspace(3)* %ptr, i65 %arg) #0 { 191; HAWAII-LABEL: local_store_i65: 192; HAWAII: ; %bb.0: 193; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x0 194; HAWAII-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 195; HAWAII-NEXT: s_load_dword s3, s[4:5], 0x4 196; HAWAII-NEXT: s_mov_b32 m0, -1 197; HAWAII-NEXT: s_waitcnt lgkmcnt(0) 198; HAWAII-NEXT: v_mov_b32_e32 v2, s2 199; HAWAII-NEXT: s_and_b32 s3, s3, 1 200; HAWAII-NEXT: v_mov_b32_e32 v0, s3 201; HAWAII-NEXT: ds_write_b8 v2, v0 offset:8 202; HAWAII-NEXT: v_mov_b32_e32 v0, s0 203; HAWAII-NEXT: v_mov_b32_e32 v1, s1 204; HAWAII-NEXT: ds_write_b64 v2, v[0:1] 205; HAWAII-NEXT: s_endpgm 206; 207; FIJI-LABEL: local_store_i65: 208; FIJI: ; %bb.0: 209; FIJI-NEXT: s_load_dword s2, s[4:5], 0x0 210; FIJI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 211; FIJI-NEXT: s_load_dword s3, s[4:5], 0x10 212; FIJI-NEXT: s_mov_b32 m0, -1 213; FIJI-NEXT: s_waitcnt lgkmcnt(0) 214; FIJI-NEXT: v_mov_b32_e32 v2, s2 215; FIJI-NEXT: s_and_b32 s3, s3, 1 216; FIJI-NEXT: v_mov_b32_e32 v0, s3 217; FIJI-NEXT: ds_write_b8 v2, v0 offset:8 218; FIJI-NEXT: v_mov_b32_e32 v0, s0 219; FIJI-NEXT: v_mov_b32_e32 v1, s1 220; FIJI-NEXT: ds_write_b64 v2, v[0:1] 221; FIJI-NEXT: s_endpgm 222; 223; GFX9-LABEL: local_store_i65: 224; GFX9: ; %bb.0: 225; GFX9-NEXT: s_load_dword s2, s[4:5], 0x0 226; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 227; GFX9-NEXT: s_load_dword s3, s[4:5], 0x10 228; GFX9-NEXT: s_waitcnt lgkmcnt(0) 229; GFX9-NEXT: v_mov_b32_e32 v2, s2 230; GFX9-NEXT: v_mov_b32_e32 v0, s0 231; GFX9-NEXT: s_and_b32 s3, s3, 1 232; GFX9-NEXT: v_mov_b32_e32 v3, s3 233; GFX9-NEXT: v_mov_b32_e32 v1, s1 234; GFX9-NEXT: ds_write_b8 v2, v3 offset:8 235; GFX9-NEXT: ds_write_b64 v2, v[0:1] 236; GFX9-NEXT: s_endpgm 237; 238; GFX10-LABEL: local_store_i65: 239; GFX10: ; %bb.0: 240; GFX10-NEXT: s_clause 0x2 241; GFX10-NEXT: s_load_dword s2, s[4:5], 0x10 242; GFX10-NEXT: s_load_dword s3, s[4:5], 0x0 243; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 244; GFX10-NEXT: s_waitcnt lgkmcnt(0) 245; GFX10-NEXT: s_and_b32 s2, s2, 1 246; GFX10-NEXT: v_mov_b32_e32 v2, s3 247; GFX10-NEXT: v_mov_b32_e32 v0, s0 248; GFX10-NEXT: v_mov_b32_e32 v3, s2 249; GFX10-NEXT: v_mov_b32_e32 v1, s1 250; GFX10-NEXT: ds_write_b8 v2, v3 offset:8 251; GFX10-NEXT: ds_write_b64 v2, v[0:1] 252; GFX10-NEXT: s_endpgm 253 store i65 %arg, i65 addrspace(3)* %ptr, align 8 254 ret void 255} 256 257define void @local_store_i13(i13 addrspace(3)* %ptr, i13 %arg) #0 { 258; CIVI-LABEL: local_store_i13: 259; CIVI: ; %bb.0: 260; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 261; CIVI-NEXT: v_and_b32_e32 v1, 0x1fff, v1 262; CIVI-NEXT: s_mov_b32 m0, -1 263; CIVI-NEXT: ds_write_b16 v0, v1 264; CIVI-NEXT: s_waitcnt lgkmcnt(0) 265; CIVI-NEXT: s_setpc_b64 s[30:31] 266; 267; GFX9-LABEL: local_store_i13: 268; GFX9: ; %bb.0: 269; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 270; GFX9-NEXT: v_and_b32_e32 v1, 0x1fff, v1 271; GFX9-NEXT: ds_write_b16 v0, v1 272; GFX9-NEXT: s_waitcnt lgkmcnt(0) 273; GFX9-NEXT: s_setpc_b64 s[30:31] 274; 275; GFX10-LABEL: local_store_i13: 276; GFX10: ; %bb.0: 277; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 278; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 279; GFX10-NEXT: v_and_b32_e32 v1, 0x1fff, v1 280; GFX10-NEXT: ds_write_b16 v0, v1 281; GFX10-NEXT: s_waitcnt lgkmcnt(0) 282; GFX10-NEXT: s_setpc_b64 s[30:31] 283 store i13 %arg, i13 addrspace(3)* %ptr, align 8 284 ret void 285} 286 287define void @local_store_i17(i17 addrspace(3)* %ptr, i17 %arg) #0 { 288; CIVI-LABEL: local_store_i17: 289; CIVI: ; %bb.0: 290; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 291; CIVI-NEXT: s_mov_b32 m0, -1 292; CIVI-NEXT: ds_write_b16 v0, v1 293; CIVI-NEXT: v_bfe_u32 v1, v1, 16, 1 294; CIVI-NEXT: ds_write_b8 v0, v1 offset:2 295; CIVI-NEXT: s_waitcnt lgkmcnt(0) 296; CIVI-NEXT: s_setpc_b64 s[30:31] 297; 298; GFX9-LABEL: local_store_i17: 299; GFX9: ; %bb.0: 300; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 301; GFX9-NEXT: ds_write_b16 v0, v1 302; GFX9-NEXT: v_and_b32_e32 v1, 0x1ffff, v1 303; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:2 304; GFX9-NEXT: s_waitcnt lgkmcnt(0) 305; GFX9-NEXT: s_setpc_b64 s[30:31] 306; 307; GFX10-LABEL: local_store_i17: 308; GFX10: ; %bb.0: 309; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 310; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 311; GFX10-NEXT: v_and_b32_e32 v2, 0x1ffff, v1 312; GFX10-NEXT: ds_write_b16 v0, v1 313; GFX10-NEXT: ds_write_b8_d16_hi v0, v2 offset:2 314; GFX10-NEXT: s_waitcnt lgkmcnt(0) 315; GFX10-NEXT: s_setpc_b64 s[30:31] 316 store i17 %arg, i17 addrspace(3)* %ptr, align 8 317 ret void 318} 319 320attributes #0 = { nounwind } 321