1; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX900 %s 2; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX906,NO-D16-HI %s 3; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX803,NO-D16-HI %s 4 5; GCN-LABEL: {{^}}store_global_hi_v2i16: 6; GCN: s_waitcnt 7 8; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off 9 10; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 11; GFX803-NEXT: flat_store_short v[0:1], v2 12; GFX906-NEXT: global_store_short v[0:1], v2, off 13 14; GCN-NEXT: s_waitcnt 15; GCN-NEXT: s_setpc_b64 16define void @store_global_hi_v2i16(i16 addrspace(1)* %out, i32 %arg) #0 { 17entry: 18 ; FIXME: ABI for pre-gfx9 19 %value = bitcast i32 %arg to <2 x i16> 20 %hi = extractelement <2 x i16> %value, i32 1 21 store i16 %hi, i16 addrspace(1)* %out 22 ret void 23} 24 25; GCN-LABEL: {{^}}store_global_hi_v2f16: 26; GCN: s_waitcnt 27 28; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off 29 30; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 31; GFX803-NEXT: flat_store_short v[0:1], v2 32; GFX906-NEXT: global_store_short v[0:1], v2, off 33 34; GCN-NEXT: s_waitcnt 35; GCN-NEXT: s_setpc_b64 36define void @store_global_hi_v2f16(half addrspace(1)* %out, i32 %arg) #0 { 37entry: 38 ; FIXME: ABI for pre-gfx9 39 %value = bitcast i32 %arg to <2 x half> 40 %hi = extractelement <2 x half> %value, i32 1 41 store half %hi, half addrspace(1)* %out 42 ret void 43} 44 45; GCN-LABEL: {{^}}store_global_hi_i32_shift: 46; GCN: s_waitcnt 47 48; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off 49 50; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 51; GFX803-NEXT: flat_store_short v[0:1], v2 52; GFX906-NEXT: global_store_short v[0:1], v2, off 53 54; GCN-NEXT: s_waitcnt 55; GCN-NEXT: s_setpc_b64 56define void @store_global_hi_i32_shift(i16 addrspace(1)* %out, i32 %value) #0 { 57entry: 58 %hi32 = lshr i32 %value, 16 59 %hi = trunc i32 %hi32 to i16 60 store i16 %hi, i16 addrspace(1)* %out 61 ret void 62} 63 64; GCN-LABEL: {{^}}store_global_hi_v2i16_i8: 65; GCN: s_waitcnt 66 67; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off 68 69; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 70; GFX803-NEXT: flat_store_byte v[0:1], v2 71; GFX906-NEXT: global_store_byte v[0:1], v2, off 72 73; GCN-NEXT: s_waitcnt 74; GCN-NEXT: s_setpc_b64 75define void @store_global_hi_v2i16_i8(i8 addrspace(1)* %out, i32 %arg) #0 { 76entry: 77 %value = bitcast i32 %arg to <2 x i16> 78 %hi = extractelement <2 x i16> %value, i32 1 79 %trunc = trunc i16 %hi to i8 80 store i8 %trunc, i8 addrspace(1)* %out 81 ret void 82} 83 84; GCN-LABEL: {{^}}store_global_hi_i8_shift: 85; GCN: s_waitcnt 86 87; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off 88 89; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 90; GFX803-NEXT: flat_store_byte v[0:1], v2 91; GFX906-NEXT: global_store_byte v[0:1], v2, off 92 93; GCN-NEXT: s_waitcnt 94; GCN-NEXT: s_setpc_b64 95define void @store_global_hi_i8_shift(i8 addrspace(1)* %out, i32 %value) #0 { 96entry: 97 %hi32 = lshr i32 %value, 16 98 %hi = trunc i32 %hi32 to i8 99 store i8 %hi, i8 addrspace(1)* %out 100 ret void 101} 102 103; GCN-LABEL: {{^}}store_global_hi_v2i16_max_offset: 104; GCN: s_waitcnt 105; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094 106 107; GFX803-DAG: v_add_u32_e32 108; GFX803-DAG: v_addc_u32_e32 109; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2 110; GFX803: flat_store_short v[0:1], v2{{$}} 111 112; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2 113; GFX906-NEXT: global_store_short v[0:1], v2, off 114 115; GCN-NEXT: s_waitcnt 116; GCN-NEXT: s_setpc_b64 117define void @store_global_hi_v2i16_max_offset(i16 addrspace(1)* %out, i32 %arg) #0 { 118entry: 119 ; FIXME: ABI for pre-gfx9 120 %value = bitcast i32 %arg to <2 x i16> 121 %hi = extractelement <2 x i16> %value, i32 1 122 %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 2047 123 store i16 %hi, i16 addrspace(1)* %gep 124 ret void 125} 126 127; GCN-LABEL: {{^}}store_global_hi_v2i16_min_offset: 128; GCN: s_waitcnt 129; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}} 130 131; GFX803-DAG: v_add_u32_e32 132; GFX803-DAG: v_addc_u32_e32 133; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2 134; GFX803: flat_store_short v[0:1], v{{[0-9]$}} 135 136; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2 137; GFX906-NEXT: global_store_short v[0:1], v2, off 138 139; GCN-NEXT: s_waitcnt 140; GCN-NEXT: s_setpc_b64 141define void @store_global_hi_v2i16_min_offset(i16 addrspace(1)* %out, i32 %arg) #0 { 142entry: 143 %value = bitcast i32 %arg to <2 x i16> 144 %hi = extractelement <2 x i16> %value, i32 1 145 %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 -2048 146 store i16 %hi, i16 addrspace(1)* %gep 147 ret void 148} 149 150; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_max_offset: 151; GCN: s_waitcnt 152; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095 153 154; GFX803-DAG: v_add_u32_e32 155; GFX803-DAG: v_addc_u32_e32 156; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2 157; GFX803: flat_store_byte v[0:1], v{{[0-9]$}} 158 159; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2 160; GFX906-NEXT: global_store_byte v[0:1], v2, off 161 162; GCN-NEXT: s_waitcnt 163; GCN-NEXT: s_setpc_b64 164define void @store_global_hi_v2i16_i8_max_offset(i8 addrspace(1)* %out, i32 %arg) #0 { 165entry: 166 %value = bitcast i32 %arg to <2 x i16> 167 %hi = extractelement <2 x i16> %value, i32 1 168 %trunc = trunc i16 %hi to i8 169 %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4095 170 store i8 %trunc, i8 addrspace(1)* %gep 171 ret void 172} 173 174; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_min_offset: 175; GCN: s_waitcnt 176; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095 177 178; GFX803-DAG: v_add_u32_e32 179; GFX803-DAG: v_addc_u32_e32 180; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2 181; GFX803: flat_store_byte v[0:1], v{{[0-9]$}} 182 183; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2 184; GFX906-NEXT: global_store_byte v[0:1], v2, off 185 186; GCN-NEXT: s_waitcnt 187; GCN-NEXT: s_setpc_b64 188define void @store_global_hi_v2i16_i8_min_offset(i8 addrspace(1)* %out, i32 %arg) #0 { 189entry: 190 %value = bitcast i32 %arg to <2 x i16> 191 %hi = extractelement <2 x i16> %value, i32 1 192 %trunc = trunc i16 %hi to i8 193 %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 -4095 194 store i8 %trunc, i8 addrspace(1)* %gep 195 ret void 196} 197 198; GCN-LABEL: {{^}}store_flat_hi_v2i16: 199; GCN: s_waitcnt 200 201; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}} 202 203; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 204; NO-D16-HI-NEXT: flat_store_short v[0:1], v2 205 206; GCN-NEXT: s_waitcnt 207; GCN-NEXT: s_setpc_b64 208define void @store_flat_hi_v2i16(i16* %out, i32 %arg) #0 { 209entry: 210 %value = bitcast i32 %arg to <2 x i16> 211 %hi = extractelement <2 x i16> %value, i32 1 212 store i16 %hi, i16* %out 213 ret void 214} 215 216; GCN-LABEL: {{^}}store_flat_hi_v2f16: 217; GCN: s_waitcnt 218 219; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}} 220 221; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 222; NO-D16-HI-NEXT: flat_store_short v[0:1], v2 223 224; GCN-NEXT: s_waitcnt 225; GCN-NEXT: s_setpc_b64 226define void @store_flat_hi_v2f16(half* %out, i32 %arg) #0 { 227entry: 228 %value = bitcast i32 %arg to <2 x half> 229 %hi = extractelement <2 x half> %value, i32 1 230 store half %hi, half* %out 231 ret void 232} 233 234; GCN-LABEL: {{^}}store_flat_hi_i32_shift: 235; GCN: s_waitcnt 236 237; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}} 238 239; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 240; NO-D16-HI-NEXT: flat_store_short v[0:1], v2 241 242; GCN-NEXT: s_waitcnt 243; GCN-NEXT: s_setpc_b64 244define void @store_flat_hi_i32_shift(i16* %out, i32 %value) #0 { 245entry: 246 %hi32 = lshr i32 %value, 16 247 %hi = trunc i32 %hi32 to i16 248 store i16 %hi, i16* %out 249 ret void 250} 251 252; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8: 253; GCN: s_waitcnt 254 255; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}} 256 257; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 258; NO-D16-HI-NEXT: flat_store_byte v[0:1], v2 259 260; GCN-NEXT: s_waitcnt 261; GCN-NEXT: s_setpc_b64 262define void @store_flat_hi_v2i16_i8(i8* %out, i32 %arg) #0 { 263entry: 264 %value = bitcast i32 %arg to <2 x i16> 265 %hi = extractelement <2 x i16> %value, i32 1 266 %trunc = trunc i16 %hi to i8 267 store i8 %trunc, i8* %out 268 ret void 269} 270 271; GCN-LABEL: {{^}}store_flat_hi_i8_shift: 272; GCN: s_waitcnt 273 274; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}} 275 276; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 277; NO-D16-HI-NEXT: flat_store_byte v[0:1], v2 278 279; GCN-NEXT: s_waitcnt 280; GCN-NEXT: s_setpc_b64 281define void @store_flat_hi_i8_shift(i8* %out, i32 %value) #0 { 282entry: 283 %hi32 = lshr i32 %value, 16 284 %hi = trunc i32 %hi32 to i8 285 store i8 %hi, i8* %out 286 ret void 287} 288 289; GCN-LABEL: {{^}}store_flat_hi_v2i16_max_offset: 290; GCN: s_waitcnt 291; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}} 292 293; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2 294; GFX906-NEXT: flat_store_short v[0:1], v2 offset:4094 295 296; GFX803-DAG: v_add_u32_e32 297; GFX803-DAG: v_addc_u32_e32 298; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2 299; GFX803: flat_store_short v[0:1], v2{{$}} 300 301; GCN-NEXT: s_waitcnt 302; GCN-NEXT: s_setpc_b64 303define void @store_flat_hi_v2i16_max_offset(i16* %out, i32 %arg) #0 { 304entry: 305 %value = bitcast i32 %arg to <2 x i16> 306 %hi = extractelement <2 x i16> %value, i32 1 307 %gep = getelementptr inbounds i16, i16* %out, i64 2047 308 store i16 %hi, i16* %gep 309 ret void 310} 311 312; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset: 313; GCN: s_waitcnt 314; GCN: v_add{{(_co)?}}_{{i|u}}32_e32 315 316; GFX803: v_addc_u32_e32 317; GFX900: v_addc_co_u32_e32 318 319; GFX906-NEXT: v_lshrrev_b32_e32 320; GFX906-NEXT: v_addc_co_u32_e32 321; GFX906: flat_store_short v[0:1], v2 322 323; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}} 324; GFX803: flat_store_short v[0:1], v2{{$}} 325; GCN-NEXT: s_waitcnt 326; GCN-NEXT: s_setpc_b64 327define void @store_flat_hi_v2i16_neg_offset(i16* %out, i32 %arg) #0 { 328entry: 329 %value = bitcast i32 %arg to <2 x i16> 330 %hi = extractelement <2 x i16> %value, i32 1 331 %gep = getelementptr inbounds i16, i16* %out, i64 -1023 332 store i16 %hi, i16* %gep 333 ret void 334} 335 336; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_max_offset: 337; GCN: s_waitcnt 338; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}} 339 340; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2 341; GFX803-DAG: v_add_u32_e32 342; GFX803-DAG: v_addc_u32_e32 343; GFX803: flat_store_byte v[0:1], v2{{$}} 344 345; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2 346; GFX906-NEXT: flat_store_byte v[0:1], v2 offset:4095{{$}} 347 348; GCN-NEXT: s_waitcnt 349; GCN-NEXT: s_setpc_b64 350define void @store_flat_hi_v2i16_i8_max_offset(i8* %out, i32 %arg) #0 { 351entry: 352 %value = bitcast i32 %arg to <2 x i16> 353 %hi = extractelement <2 x i16> %value, i32 1 354 %trunc = trunc i16 %hi to i8 355 %gep = getelementptr inbounds i8, i8* %out, i64 4095 356 store i8 %trunc, i8* %gep 357 ret void 358} 359 360; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset: 361; GCN: s_waitcnt 362; GCN-DAG: v_add{{(_co)?}}_{{i|u}}32_e32 363 364; GFX803-DAG: v_addc_u32_e32 365; GFX900-DAG: v_addc_co_u32_e32 366; GFX906-DAG: v_add_co_u32_e32 367 368; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}} 369 370; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2 371; GFX906-NEXT: v_addc_co_u32_e32 372; GFX906-NEXT: flat_store_byte v[0:1], v2{{$}} 373 374; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2 375; GFX803: flat_store_byte v[0:1], v2{{$}} 376 377; GCN-NEXT: s_waitcnt 378; GCN-NEXT: s_setpc_b64 379define void @store_flat_hi_v2i16_i8_neg_offset(i8* %out, i32 %arg) #0 { 380entry: 381 %value = bitcast i32 %arg to <2 x i16> 382 %hi = extractelement <2 x i16> %value, i32 1 383 %trunc = trunc i16 %hi to i8 384 %gep = getelementptr inbounds i8, i8* %out, i64 -4095 385 store i8 %trunc, i8* %gep 386 ret void 387} 388 389; GCN-LABEL: {{^}}store_private_hi_v2i16: 390; GCN: s_waitcnt 391 392; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}} 393 394; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1 395; NO-D16-HI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}} 396 397; GCN-NEXT: s_waitcnt 398; GCN-NEXT: s_setpc_b64 399define void @store_private_hi_v2i16(i16 addrspace(5)* %out, i32 %arg) #0 { 400entry: 401 ; FIXME: ABI for pre-gfx9 402 %value = bitcast i32 %arg to <2 x i16> 403 %hi = extractelement <2 x i16> %value, i32 1 404 store i16 %hi, i16 addrspace(5)* %out 405 ret void 406} 407 408; GCN-LABEL: {{^}}store_private_hi_v2f16: 409; GCN: s_waitcnt 410 411; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}} 412 413; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1 414; NO-D16-HI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}} 415 416; GCN-NEXT: s_waitcnt 417; GCN-NEXT: s_setpc_b64 418define void @store_private_hi_v2f16(half addrspace(5)* %out, i32 %arg) #0 { 419entry: 420 ; FIXME: ABI for pre-gfx9 421 %value = bitcast i32 %arg to <2 x half> 422 %hi = extractelement <2 x half> %value, i32 1 423 store half %hi, half addrspace(5)* %out 424 ret void 425} 426 427; GCN-LABEL: {{^}}store_private_hi_i32_shift: 428; GCN: s_waitcnt 429 430; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}} 431 432; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 433; NO-D16-HI-NEXT: buffer_store_short v1, v0, s[0:3], s4 offen{{$}} 434 435; GCN-NEXT: s_waitcnt 436; GCN-NEXT: s_setpc_b64 437define void @store_private_hi_i32_shift(i16 addrspace(5)* %out, i32 %value) #0 { 438entry: 439 %hi32 = lshr i32 %value, 16 440 %hi = trunc i32 %hi32 to i16 441 store i16 %hi, i16 addrspace(5)* %out 442 ret void 443} 444 445; GCN-LABEL: {{^}}store_private_hi_v2i16_i8: 446; GCN: s_waitcnt 447 448; GFX900-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}} 449 450; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 451; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}} 452 453; GCN-NEXT: s_waitcnt 454; GCN-NEXT: s_setpc_b64 455define void @store_private_hi_v2i16_i8(i8 addrspace(5)* %out, i32 %arg) #0 { 456entry: 457 %value = bitcast i32 %arg to <2 x i16> 458 %hi = extractelement <2 x i16> %value, i32 1 459 %trunc = trunc i16 %hi to i8 460 store i8 %trunc, i8 addrspace(5)* %out 461 ret void 462} 463 464; GCN-LABEL: {{^}}store_private_hi_i8_shift: 465; GCN: s_waitcnt 466 467; GFX900-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}} 468 469; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 470; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}} 471 472; GCN-NEXT: s_waitcnt 473; GCN-NEXT: s_setpc_b64 474define void @store_private_hi_i8_shift(i8 addrspace(5)* %out, i32 %value) #0 { 475entry: 476 %hi32 = lshr i32 %value, 16 477 %hi = trunc i32 %hi32 to i8 478 store i8 %hi, i8 addrspace(5)* %out 479 ret void 480} 481 482; GCN-LABEL: {{^}}store_private_hi_v2i16_max_offset: 483; GCN: s_waitcnt 484; GFX900: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094{{$}} 485 486; NO-D16-HI: v_lshrrev_b32_e32 v0, 16, v0 487; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], s5 offset:4094{{$}} 488 489; GCN-NEXT: s_waitcnt 490; GCN-NEXT: s_setpc_b64 491define void @store_private_hi_v2i16_max_offset(i16 addrspace(5)* byval %out, i32 %arg) #0 { 492entry: 493 %value = bitcast i32 %arg to <2 x i16> 494 %hi = extractelement <2 x i16> %value, i32 1 495 %gep = getelementptr inbounds i16, i16 addrspace(5)* %out, i64 2045 496 store i16 %hi, i16 addrspace(5)* %gep 497 ret void 498} 499 500 501 502; GCN-LABEL: {{^}}store_private_hi_v2i16_nooff: 503; GCN: s_waitcnt 504 505; GFX900-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s4{{$}} 506 507; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 508; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], s4{{$}} 509 510; GCN-NEXT: s_waitcnt 511; GCN-NEXT: s_setpc_b64 512define void @store_private_hi_v2i16_nooff(i32 %arg) #0 { 513entry: 514 ; FIXME: ABI for pre-gfx9 515 %value = bitcast i32 %arg to <2 x i16> 516 %hi = extractelement <2 x i16> %value, i32 1 517 store volatile i16 %hi, i16 addrspace(5)* null 518 ret void 519} 520 521 522; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_nooff: 523; GCN: s_waitcnt 524 525; GFX900-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s4{{$}} 526 527; NO-D16-HI: v_lshrrev_b32_e32 v0, 16, v0 528; NO-D16-HI: buffer_store_byte v0, off, s[0:3], s4{{$}} 529 530; GCN-NEXT: s_waitcnt 531; GCN-NEXT: s_setpc_b64 532define void @store_private_hi_v2i16_i8_nooff(i32 %arg) #0 { 533entry: 534 %value = bitcast i32 %arg to <2 x i16> 535 %hi = extractelement <2 x i16> %value, i32 1 536 %trunc = trunc i16 %hi to i8 537 store volatile i8 %trunc, i8 addrspace(5)* null 538 ret void 539} 540 541; GCN-LABEL: {{^}}store_local_hi_v2i16: 542; GCN: s_waitcnt 543 544; GFX900-NEXT: ds_write_b16_d16_hi v0, v1{{$}} 545 546; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1 547; NO-D16-HI: ds_write_b16 v0, v1 548 549; GCN-NEXT: s_waitcnt 550; GCN-NEXT: s_setpc_b64 551define void @store_local_hi_v2i16(i16 addrspace(3)* %out, i32 %arg) #0 { 552entry: 553 ; FIXME: ABI for pre-gfx9 554 %value = bitcast i32 %arg to <2 x i16> 555 %hi = extractelement <2 x i16> %value, i32 1 556 store i16 %hi, i16 addrspace(3)* %out 557 ret void 558} 559 560; GCN-LABEL: {{^}}store_local_hi_v2f16: 561; GCN: s_waitcnt 562 563; GFX900-NEXT: ds_write_b16_d16_hi v0, v1{{$}} 564 565; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1 566; NO-D16-HI: ds_write_b16 v0, v1 567 568; GCN-NEXT: s_waitcnt 569; GCN-NEXT: s_setpc_b64 570define void @store_local_hi_v2f16(half addrspace(3)* %out, i32 %arg) #0 { 571entry: 572 ; FIXME: ABI for pre-gfx9 573 %value = bitcast i32 %arg to <2 x half> 574 %hi = extractelement <2 x half> %value, i32 1 575 store half %hi, half addrspace(3)* %out 576 ret void 577} 578 579; GCN-LABEL: {{^}}store_local_hi_i32_shift: 580; GCN: s_waitcnt 581 582; GFX900-NEXT: ds_write_b16_d16_hi v0, v1{{$}} 583 584; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1 585; NO-D16-HI: ds_write_b16 v0, v1 586 587; GCN-NEXT: s_waitcnt 588; GCN-NEXT: s_setpc_b64 589define void @store_local_hi_i32_shift(i16 addrspace(3)* %out, i32 %value) #0 { 590entry: 591 %hi32 = lshr i32 %value, 16 592 %hi = trunc i32 %hi32 to i16 593 store i16 %hi, i16 addrspace(3)* %out 594 ret void 595} 596 597; GCN-LABEL: {{^}}store_local_hi_v2i16_i8: 598; GCN: s_waitcnt 599 600; GFX900-NEXT: ds_write_b8_d16_hi v0, v1{{$}} 601 602; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1 603; NO-D16-HI: ds_write_b8 v0, v1 604 605; GCN-NEXT: s_waitcnt 606; GCN-NEXT: s_setpc_b64 607define void @store_local_hi_v2i16_i8(i8 addrspace(3)* %out, i32 %arg) #0 { 608entry: 609 %value = bitcast i32 %arg to <2 x i16> 610 %hi = extractelement <2 x i16> %value, i32 1 611 %trunc = trunc i16 %hi to i8 612 store i8 %trunc, i8 addrspace(3)* %out 613 ret void 614} 615 616; GCN-LABEL: {{^}}store_local_hi_v2i16_max_offset: 617; GCN: s_waitcnt 618; GFX900-NEXT: ds_write_b16_d16_hi v0, v1 offset:65534{{$}} 619 620; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1 621; NO-D16-HI: ds_write_b16 v0, v1 offset:65534{{$}} 622 623; GCN-NEXT: s_waitcnt 624; GCN-NEXT: s_setpc_b64 625define void @store_local_hi_v2i16_max_offset(i16 addrspace(3)* %out, i32 %arg) #0 { 626entry: 627 ; FIXME: ABI for pre-gfx9 628 %value = bitcast i32 %arg to <2 x i16> 629 %hi = extractelement <2 x i16> %value, i32 1 630 %gep = getelementptr inbounds i16, i16 addrspace(3)* %out, i64 32767 631 store i16 %hi, i16 addrspace(3)* %gep 632 ret void 633} 634 635; GCN-LABEL: {{^}}store_private_hi_v2i16_to_offset: 636; GCN: s_waitcnt 637; GFX900: buffer_store_dword 638; GFX900-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094 639define void @store_private_hi_v2i16_to_offset(i32 %arg) #0 { 640entry: 641 %obj0 = alloca [10 x i32], align 4, addrspace(5) 642 %obj1 = alloca [4096 x i16], align 2, addrspace(5) 643 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 644 store volatile i32 123, i32 addrspace(5)* %bc 645 %value = bitcast i32 %arg to <2 x i16> 646 %hi = extractelement <2 x i16> %value, i32 1 647 %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2025 648 store i16 %hi, i16 addrspace(5)* %gep 649 ret void 650} 651 652; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_to_offset: 653; GCN: s_waitcnt 654; GFX900: buffer_store_dword 655; GFX900-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s5 offset:4095 656define void @store_private_hi_v2i16_i8_to_offset(i32 %arg) #0 { 657entry: 658 %obj0 = alloca [10 x i32], align 4, addrspace(5) 659 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 660 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 661 store volatile i32 123, i32 addrspace(5)* %bc 662 %value = bitcast i32 %arg to <2 x i16> 663 %hi = extractelement <2 x i16> %value, i32 1 664 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051 665 %trunc = trunc i16 %hi to i8 666 store i8 %trunc, i8 addrspace(5)* %gep 667 ret void 668} 669 670attributes #0 = { nounwind } 671