1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s 4; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s 5 6; Testing for ds_read/write_b128 7; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s 8; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s 9 10; FUNC-LABEL: {{^}}local_load_i16: 11; GFX9-NOT: m0 12; SICIVI: s_mov_b32 m0 13 14; GCN: ds_read_u16 v{{[0-9]+}} 15 16; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 17; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 18; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 19; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 20; EG: LDS_SHORT_WRITE {{\*?}} [[TO]], [[DATA]] 21define amdgpu_kernel void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) { 22entry: 23 %ld = load i16, i16 addrspace(3)* %in 24 store i16 %ld, i16 addrspace(3)* %out 25 ret void 26} 27 28; FUNC-LABEL: {{^}}local_load_v2i16: 29; GFX9-NOT: m0 30; SICIVI: s_mov_b32 m0 31 32; GCN: ds_read_b32 33 34; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 35; EG: LDS_READ_RET {{.*}} [[FROM]] 36; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 37; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 38; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 39define amdgpu_kernel void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) { 40entry: 41 %ld = load <2 x i16>, <2 x i16> addrspace(3)* %in 42 store <2 x i16> %ld, <2 x i16> addrspace(3)* %out 43 ret void 44} 45 46; FUNC-LABEL: {{^}}local_load_v3i16: 47; GFX9-NOT: m0 48; SICIVI: s_mov_b32 m0 49 50; GCN: ds_read_b64 51; GCN-DAG: ds_write_b32 52; GCN-DAG: ds_write_b16 53 54; EG-DAG: LDS_USHORT_READ_RET 55; EG-DAG: LDS_USHORT_READ_RET 56define amdgpu_kernel void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { 57entry: 58 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in 59 store <3 x i16> %ld, <3 x i16> addrspace(3)* %out 60 ret void 61} 62 63; FUNC-LABEL: {{^}}local_load_v4i16: 64; GFX9-NOT: m0 65; SICIVI: s_mov_b32 m0 66 67; GCN: ds_read_b64 68 69; EG: LDS_READ_RET 70; EG: LDS_READ_RET 71define amdgpu_kernel void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) { 72entry: 73 %ld = load <4 x i16>, <4 x i16> addrspace(3)* %in 74 store <4 x i16> %ld, <4 x i16> addrspace(3)* %out 75 ret void 76} 77 78; FUNC-LABEL: {{^}}local_load_v8i16: 79; GFX9-NOT: m0 80; SICIVI: s_mov_b32 m0 81 82; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 83 84; EG: LDS_READ_RET 85; EG: LDS_READ_RET 86; EG: LDS_READ_RET 87; EG: LDS_READ_RET 88define amdgpu_kernel void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) { 89entry: 90 %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in 91 store <8 x i16> %ld, <8 x i16> addrspace(3)* %out 92 ret void 93} 94 95; FUNC-LABEL: {{^}}local_load_v16i16: 96; GFX9-NOT: m0 97; SICIVI: s_mov_b32 m0 98 99; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} 100; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 101 102 103; EG: LDS_READ_RET 104; EG: LDS_READ_RET 105; EG: LDS_READ_RET 106; EG: LDS_READ_RET 107 108; EG: LDS_READ_RET 109; EG: LDS_READ_RET 110; EG: LDS_READ_RET 111; EG: LDS_READ_RET 112define amdgpu_kernel void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) { 113entry: 114 %ld = load <16 x i16>, <16 x i16> addrspace(3)* %in 115 store <16 x i16> %ld, <16 x i16> addrspace(3)* %out 116 ret void 117} 118 119; FUNC-LABEL: {{^}}local_zextload_i16_to_i32: 120; GFX9-NOT: m0 121; SICIVI: s_mov_b32 m0 122 123; GCN: ds_read_u16 124; GCN: ds_write_b32 125 126; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 127; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 128; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 129; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 130; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 131define amdgpu_kernel void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { 132 %a = load i16, i16 addrspace(3)* %in 133 %ext = zext i16 %a to i32 134 store i32 %ext, i32 addrspace(3)* %out 135 ret void 136} 137 138; FUNC-LABEL: {{^}}local_sextload_i16_to_i32: 139; GCN-NOT: s_wqm_b64 140 141; GFX9-NOT: m0 142; SICIVI: s_mov_b32 m0 143 144; GCN: ds_read_i16 145 146; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 147; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 148; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP 149; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 150; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal 151; EG: 16 152; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 153define amdgpu_kernel void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { 154 %a = load i16, i16 addrspace(3)* %in 155 %ext = sext i16 %a to i32 156 store i32 %ext, i32 addrspace(3)* %out 157 ret void 158} 159 160; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32: 161; GFX9-NOT: m0 162; SICIVI: s_mov_b32 m0 163 164; GCN: ds_read_u16 165 166; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 167; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 168; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 169; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 170; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 171define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { 172 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in 173 %ext = zext <1 x i16> %load to <1 x i32> 174 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out 175 ret void 176} 177 178; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32: 179; GFX9-NOT: m0 180; SICIVI: s_mov_b32 m0 181 182; GCN: ds_read_i16 183 184; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 185; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 186; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP 187; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 188; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal 189; EG: 16 190; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 191define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { 192 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in 193 %ext = sext <1 x i16> %load to <1 x i32> 194 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out 195 ret void 196} 197 198; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32: 199; GCN-NOT: s_wqm_b64 200; GFX9-NOT: m0 201; SICIVI: s_mov_b32 m0 202 203; GCN: ds_read_b32 204 205; EG: LDS_READ_RET 206define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { 207 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in 208 %ext = zext <2 x i16> %load to <2 x i32> 209 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out 210 ret void 211} 212 213; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32: 214; GCN-NOT: s_wqm_b64 215; GFX9-NOT: m0 216; SICIVI: s_mov_b32 m0 217 218; GCN: ds_read_b32 219 220; EG: LDS_READ_RET 221; EG: BFE_INT 222; EG: BFE_INT 223define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { 224 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in 225 %ext = sext <2 x i16> %load to <2 x i32> 226 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out 227 ret void 228} 229 230; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32: 231; GFX9-NOT: m0 232; SICIVI: s_mov_b32 m0 233 234; GCN: ds_read_b64 235; SI-DAG: ds_write_b32 236; SI-DAG: ds_write_b64 237; CIVI-DAG: ds_write_b96 238; GFX9-DAG: ds_write_b96 239 240; EG: LDS_USHORT_READ_RET 241; EG: LDS_USHORT_READ_RET 242; EG: LDS_USHORT_READ_RET 243define amdgpu_kernel void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { 244entry: 245 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in 246 %ext = zext <3 x i16> %ld to <3 x i32> 247 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out 248 ret void 249} 250 251; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32: 252; GFX9-NOT: m0 253; SICIVI: s_mov_b32 m0 254 255; GCN: ds_read_b64 256; SI-DAG: ds_write_b32 257; SI-DAG: ds_write_b64 258; CIVI-DAG: ds_write_b96 259; GFX9-DAG: ds_write_b96 260 261; EG: LDS_USHORT_READ_RET 262; EG: LDS_USHORT_READ_RET 263; EG: LDS_USHORT_READ_RET 264; EG-DAG: BFE_INT 265; EG-DAG: BFE_INT 266; EG-DAG: BFE_INT 267define amdgpu_kernel void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { 268entry: 269 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in 270 %ext = sext <3 x i16> %ld to <3 x i32> 271 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out 272 ret void 273} 274 275; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32: 276; GCN-NOT: s_wqm_b64 277; GFX9-NOT: m0 278; SICIVI: s_mov_b32 m0 279 280; GCN: ds_read_b64 281 282; EG: LDS_READ_RET 283; EG: LDS_READ_RET 284define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { 285 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in 286 %ext = zext <4 x i16> %load to <4 x i32> 287 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out 288 ret void 289} 290 291; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32: 292; GCN-NOT: s_wqm_b64 293; GFX9-NOT: m0 294; SICIVI: s_mov_b32 m0 295 296; GCN: ds_read_b64 297 298; EG: LDS_READ_RET 299; EG: LDS_READ_RET 300; EG-DAG: BFE_INT 301; EG-DAG: BFE_INT 302; EG-DAG: BFE_INT 303; EG-DAG: BFE_INT 304define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { 305 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in 306 %ext = sext <4 x i16> %load to <4 x i32> 307 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out 308 ret void 309} 310 311; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32: 312; GFX9-NOT: m0 313; SICIVI: s_mov_b32 m0 314 315; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 316 317; EG: LDS_READ_RET 318; EG: LDS_READ_RET 319; EG: LDS_READ_RET 320; EG: LDS_READ_RET 321define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { 322 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in 323 %ext = zext <8 x i16> %load to <8 x i32> 324 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out 325 ret void 326} 327 328; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32: 329; GFX9-NOT: m0 330; SICIVI: s_mov_b32 m0 331 332; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 333 334; EG: LDS_READ_RET 335; EG: LDS_READ_RET 336; EG: LDS_READ_RET 337; EG: LDS_READ_RET 338; EG-DAG: BFE_INT 339; EG-DAG: BFE_INT 340; EG-DAG: BFE_INT 341; EG-DAG: BFE_INT 342; EG-DAG: BFE_INT 343; EG-DAG: BFE_INT 344; EG-DAG: BFE_INT 345; EG-DAG: BFE_INT 346define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { 347 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in 348 %ext = sext <8 x i16> %load to <8 x i32> 349 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out 350 ret void 351} 352 353; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32: 354; GFX9-NOT: m0 355; SICIVI: s_mov_b32 m0 356 357; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 358; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} 359 360; GCN: ds_write2_b64 361; GCN: ds_write2_b64 362; GCN: ds_write2_b64 363; GCN: ds_write2_b64 364 365; EG: LDS_READ_RET 366; EG: LDS_READ_RET 367; EG: LDS_READ_RET 368; EG: LDS_READ_RET 369; EG: LDS_READ_RET 370; EG: LDS_READ_RET 371; EG: LDS_READ_RET 372; EG: LDS_READ_RET 373define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { 374 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in 375 %ext = zext <16 x i16> %load to <16 x i32> 376 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out 377 ret void 378} 379 380; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32: 381; GFX9-NOT: m0 382; SICIVI: s_mov_b32 m0 383 384 385; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 386; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} 387 388; EG: LDS_READ_RET 389; EG: LDS_READ_RET 390; EG: LDS_READ_RET 391; EG: LDS_READ_RET 392; EG: LDS_READ_RET 393; EG: LDS_READ_RET 394; EG: LDS_READ_RET 395; EG: LDS_READ_RET 396; EG-DAG: BFE_INT 397; EG-DAG: BFE_INT 398; EG-DAG: BFE_INT 399; EG-DAG: BFE_INT 400; EG-DAG: BFE_INT 401; EG-DAG: BFE_INT 402; EG-DAG: BFE_INT 403; EG-DAG: BFE_INT 404; EG-DAG: BFE_INT 405; EG-DAG: BFE_INT 406; EG-DAG: BFE_INT 407; EG-DAG: BFE_INT 408; EG-DAG: BFE_INT 409; EG-DAG: BFE_INT 410; EG-DAG: BFE_INT 411; EG-DAG: BFE_INT 412define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { 413 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in 414 %ext = sext <16 x i16> %load to <16 x i32> 415 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out 416 ret void 417} 418 419; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32: 420; GFX9-NOT: m0 421; SICIVI: s_mov_b32 m0 422 423; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 424; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 425; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 426; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 427 428; EG: LDS_READ_RET 429; EG: LDS_READ_RET 430; EG: LDS_READ_RET 431; EG: LDS_READ_RET 432; EG: LDS_READ_RET 433; EG: LDS_READ_RET 434; EG: LDS_READ_RET 435; EG: LDS_READ_RET 436; EG: LDS_READ_RET 437; EG: LDS_READ_RET 438; EG: LDS_READ_RET 439; EG: LDS_READ_RET 440; EG: LDS_READ_RET 441; EG: LDS_READ_RET 442; EG: LDS_READ_RET 443; EG: LDS_READ_RET 444define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { 445 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in 446 %ext = zext <32 x i16> %load to <32 x i32> 447 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out 448 ret void 449} 450 451; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32: 452; GFX9-NOT: m0 453; SICIVI: s_mov_b32 m0 454 455; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} 456; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 457; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 458; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 459; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 460; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13 461; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11 462; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9 463; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7 464; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5 465; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 466; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1 467 468; EG: LDS_READ_RET 469; EG: LDS_READ_RET 470; EG: LDS_READ_RET 471; EG: LDS_READ_RET 472; EG: LDS_READ_RET 473; EG: LDS_READ_RET 474; EG: LDS_READ_RET 475; EG: LDS_READ_RET 476; EG: LDS_READ_RET 477; EG: LDS_READ_RET 478; EG: LDS_READ_RET 479; EG: LDS_READ_RET 480; EG: LDS_READ_RET 481; EG: LDS_READ_RET 482; EG: LDS_READ_RET 483; EG: LDS_READ_RET 484define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { 485 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in 486 %ext = sext <32 x i16> %load to <32 x i32> 487 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out 488 ret void 489} 490 491; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32: 492; GFX9-NOT: m0 493; SICIVI: s_mov_b32 m0 494 495; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:14 offset1:15 496; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 497; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 498; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 499; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 500; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:8 offset1:9 501; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:12 offset1:13 502; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:10 offset1:11 503; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31 504; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29 505; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:26 offset1:27 506; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:24 offset1:25 507; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:22 offset1:23 508; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:20 offset1:21 509; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:18 offset1:19 510; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:16 offset1:17 511; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 512; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13 513; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11 514; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9 515; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7 516; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5 517; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 518; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1 519 520; EG: LDS_READ_RET 521; EG: LDS_READ_RET 522; EG: LDS_READ_RET 523; EG: LDS_READ_RET 524; EG: LDS_READ_RET 525; EG: LDS_READ_RET 526; EG: LDS_READ_RET 527; EG: LDS_READ_RET 528; EG: LDS_READ_RET 529; EG: LDS_READ_RET 530; EG: LDS_READ_RET 531; EG: LDS_READ_RET 532; EG: LDS_READ_RET 533; EG: LDS_READ_RET 534; EG: LDS_READ_RET 535; EG: LDS_READ_RET 536; EG: LDS_READ_RET 537; EG: LDS_READ_RET 538; EG: LDS_READ_RET 539; EG: LDS_READ_RET 540; EG: LDS_READ_RET 541; EG: LDS_READ_RET 542; EG: LDS_READ_RET 543; EG: LDS_READ_RET 544; EG: LDS_READ_RET 545; EG: LDS_READ_RET 546; EG: LDS_READ_RET 547; EG: LDS_READ_RET 548; EG: LDS_READ_RET 549; EG: LDS_READ_RET 550; EG: LDS_READ_RET 551; EG: LDS_READ_RET 552define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { 553 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in 554 %ext = zext <64 x i16> %load to <64 x i32> 555 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out 556 ret void 557} 558 559; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32: 560; GFX9-NOT: m0 561; SICIVI: s_mov_b32 m0 562 563; EG: LDS_READ_RET 564; EG: LDS_READ_RET 565; EG: LDS_READ_RET 566; EG: LDS_READ_RET 567; EG: LDS_READ_RET 568; EG: LDS_READ_RET 569; EG: LDS_READ_RET 570; EG: LDS_READ_RET 571; EG: LDS_READ_RET 572; EG: LDS_READ_RET 573; EG: LDS_READ_RET 574; EG: LDS_READ_RET 575; EG: LDS_READ_RET 576; EG: LDS_READ_RET 577; EG: LDS_READ_RET 578; EG: LDS_READ_RET 579; EG: LDS_READ_RET 580; EG: LDS_READ_RET 581; EG: LDS_READ_RET 582; EG: LDS_READ_RET 583; EG: LDS_READ_RET 584; EG: LDS_READ_RET 585; EG: LDS_READ_RET 586; EG: LDS_READ_RET 587; EG: LDS_READ_RET 588; EG: LDS_READ_RET 589; EG: LDS_READ_RET 590; EG: LDS_READ_RET 591; EG: LDS_READ_RET 592; EG: LDS_READ_RET 593; EG: LDS_READ_RET 594; EG: LDS_READ_RET 595define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { 596 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in 597 %ext = sext <64 x i16> %load to <64 x i32> 598 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out 599 ret void 600} 601 602; FUNC-LABEL: {{^}}local_zextload_i16_to_i64: 603; GFX9-NOT: m0 604; SICIVI: s_mov_b32 m0 605 606; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]], 607; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 608 609; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] 610 611; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 612; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 613; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 614; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 615; EG-DAG: LDS_WRITE 616define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { 617 %a = load i16, i16 addrspace(3)* %in 618 %ext = zext i16 %a to i64 619 store i64 %ext, i64 addrspace(3)* %out 620 ret void 621} 622 623; FUNC-LABEL: {{^}}local_sextload_i16_to_i64: 624; GFX9-NOT: m0 625; SICIVI: s_mov_b32 m0 626 627; FIXME: Need to optimize this sequence to avoid an extra shift. 628; t25: i32,ch = load<LD2[%in(addrspace=3)], anyext from i16> t12, t10, undef:i32 629; t28: i64 = any_extend t25 630; t30: i64 = sign_extend_inreg t28, ValueType:ch:i16 631; SI: ds_read_i16 v[[LO:[0-9]+]], 632; GFX89: ds_read_u16 v[[ULO:[0-9]+]] 633; GFX89: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16 634; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 635 636; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] 637 638; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 639; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 640; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP 641; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 642; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal 643; EG-DAG: LDS_WRITE 644; EG-DAG: 16 645; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 646define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { 647 %a = load i16, i16 addrspace(3)* %in 648 %ext = sext i16 %a to i64 649 store i64 %ext, i64 addrspace(3)* %out 650 ret void 651} 652 653; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64: 654; GFX9-NOT: m0 655; SICIVI: s_mov_b32 m0 656 657 658; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 659; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 660; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 661; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 662; EG-DAG: LDS_WRITE 663define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { 664 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in 665 %ext = zext <1 x i16> %load to <1 x i64> 666 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out 667 ret void 668} 669 670; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64: 671; GFX9-NOT: m0 672; SICIVI: s_mov_b32 m0 673 674 675; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 676; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 677; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP 678; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 679; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal 680; EG-DAG: LDS_WRITE 681; EG-DAG: 16 682; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 683define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { 684 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in 685 %ext = sext <1 x i16> %load to <1 x i64> 686 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out 687 ret void 688} 689 690; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64: 691; GFX9-NOT: m0 692; SICIVI: s_mov_b32 m0 693 694 695; EG: LDS_READ_RET 696define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { 697 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in 698 %ext = zext <2 x i16> %load to <2 x i64> 699 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out 700 ret void 701} 702 703; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64: 704; GFX9-NOT: m0 705; SICIVI: s_mov_b32 m0 706 707 708; EG: LDS_READ_RET 709; EG-DAG: BFE_INT 710; EG-DAG: ASHR 711define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { 712 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in 713 %ext = sext <2 x i16> %load to <2 x i64> 714 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out 715 ret void 716} 717 718; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64: 719; GFX9-NOT: m0 720; SICIVI: s_mov_b32 m0 721 722 723; EG: LDS_READ_RET 724; EG: LDS_READ_RET 725define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { 726 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in 727 %ext = zext <4 x i16> %load to <4 x i64> 728 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out 729 ret void 730} 731 732; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64: 733; GFX9-NOT: m0 734; SICIVI: s_mov_b32 m0 735 736 737; EG: LDS_READ_RET 738; EG: LDS_READ_RET 739; EG-DAG: BFE_INT 740; EG-DAG: BFE_INT 741; EG-DAG: ASHR 742; EG-DAG: ASHR 743define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { 744 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in 745 %ext = sext <4 x i16> %load to <4 x i64> 746 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out 747 ret void 748} 749 750; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64: 751; GFX9-NOT: m0 752; SICIVI: s_mov_b32 m0 753 754 755; EG: LDS_READ_RET 756; EG: LDS_READ_RET 757; EG: LDS_READ_RET 758; EG: LDS_READ_RET 759define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { 760 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in 761 %ext = zext <8 x i16> %load to <8 x i64> 762 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out 763 ret void 764} 765 766; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64: 767; GFX9-NOT: m0 768; SICIVI: s_mov_b32 m0 769 770 771; EG: LDS_READ_RET 772; EG: LDS_READ_RET 773; EG: LDS_READ_RET 774; EG: LDS_READ_RET 775; EG-DAG: BFE_INT 776; EG-DAG: BFE_INT 777; EG-DAG: ASHR 778; EG-DAG: ASHR 779; EG-DAG: BFE_INT 780; EG-DAG: BFE_INT 781; EG-DAG: ASHR 782; EG-DAG: ASHR 783define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { 784 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in 785 %ext = sext <8 x i16> %load to <8 x i64> 786 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out 787 ret void 788} 789 790; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64: 791; GFX9-NOT: m0 792; SICIVI: s_mov_b32 m0 793 794 795; EG: LDS_READ_RET 796; EG: LDS_READ_RET 797; EG: LDS_READ_RET 798; EG: LDS_READ_RET 799; EG: LDS_READ_RET 800; EG: LDS_READ_RET 801; EG: LDS_READ_RET 802; EG: LDS_READ_RET 803define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { 804 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in 805 %ext = zext <16 x i16> %load to <16 x i64> 806 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out 807 ret void 808} 809 810; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64: 811; GFX9-NOT: m0 812; SICIVI: s_mov_b32 m0 813 814 815; EG: LDS_READ_RET 816; EG: LDS_READ_RET 817; EG: LDS_READ_RET 818; EG: LDS_READ_RET 819; EG: LDS_READ_RET 820; EG: LDS_READ_RET 821; EG: LDS_READ_RET 822; EG: LDS_READ_RET 823; EG-DAG: BFE_INT 824; EG-DAG: BFE_INT 825; EG-DAG: ASHR 826; EG-DAG: ASHR 827; EG-DAG: BFE_INT 828; EG-DAG: BFE_INT 829; EG-DAG: ASHR 830; EG-DAG: ASHR 831; EG-DAG: BFE_INT 832; EG-DAG: BFE_INT 833; EG-DAG: ASHR 834; EG-DAG: ASHR 835; EG-DAG: BFE_INT 836; EG-DAG: BFE_INT 837; EG-DAG: ASHR 838; EG-DAG: ASHR 839define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { 840 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in 841 %ext = sext <16 x i16> %load to <16 x i64> 842 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out 843 ret void 844} 845 846; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64: 847; GFX9-NOT: m0 848; SICIVI: s_mov_b32 m0 849 850 851; EG: LDS_READ_RET 852; EG: LDS_READ_RET 853; EG: LDS_READ_RET 854; EG: LDS_READ_RET 855; EG: LDS_READ_RET 856; EG: LDS_READ_RET 857; EG: LDS_READ_RET 858; EG: LDS_READ_RET 859; EG: LDS_READ_RET 860; EG: LDS_READ_RET 861; EG: LDS_READ_RET 862; EG: LDS_READ_RET 863; EG: LDS_READ_RET 864; EG: LDS_READ_RET 865; EG: LDS_READ_RET 866; EG: LDS_READ_RET 867define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { 868 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in 869 %ext = zext <32 x i16> %load to <32 x i64> 870 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out 871 ret void 872} 873 874; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64: 875; GFX9-NOT: m0 876; SICIVI: s_mov_b32 m0 877 878 879; EG: LDS_READ_RET 880; EG: LDS_READ_RET 881; EG: LDS_READ_RET 882; EG: LDS_READ_RET 883; EG: LDS_READ_RET 884; EG: LDS_READ_RET 885; EG: LDS_READ_RET 886; EG: LDS_READ_RET 887; EG: LDS_READ_RET 888; EG: LDS_READ_RET 889; EG: LDS_READ_RET 890; EG: LDS_READ_RET 891; EG: LDS_READ_RET 892; EG: LDS_READ_RET 893; EG: LDS_READ_RET 894; EG: LDS_READ_RET 895; EG-DAG: BFE_INT 896; EG-DAG: BFE_INT 897; EG-DAG: ASHR 898; EG-DAG: ASHR 899; EG-DAG: BFE_INT 900; EG-DAG: BFE_INT 901; EG-DAG: ASHR 902; EG-DAG: ASHR 903; EG-DAG: BFE_INT 904; EG-DAG: BFE_INT 905; EG-DAG: ASHR 906; EG-DAG: ASHR 907; EG-DAG: BFE_INT 908; EG-DAG: BFE_INT 909; EG-DAG: ASHR 910; EG-DAG: ASHR 911; EG-DAG: BFE_INT 912; EG-DAG: BFE_INT 913; EG-DAG: ASHR 914; EG-DAG: ASHR 915; EG-DAG: BFE_INT 916; EG-DAG: BFE_INT 917; EG-DAG: ASHR 918; EG-DAG: ASHR 919; EG-DAG: BFE_INT 920; EG-DAG: BFE_INT 921; EG-DAG: ASHR 922; EG-DAG: ASHR 923; EG-DAG: BFE_INT 924; EG-DAG: BFE_INT 925; EG-DAG: ASHR 926; EG-DAG: ASHR 927define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { 928 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in 929 %ext = sext <32 x i16> %load to <32 x i64> 930 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out 931 ret void 932} 933 934; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64: 935; define amdgpu_kernel void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { 936; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in 937; %ext = zext <64 x i16> %load to <64 x i64> 938; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out 939; ret void 940; } 941 942; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64: 943; define amdgpu_kernel void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { 944; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in 945; %ext = sext <64 x i16> %load to <64 x i64> 946; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out 947; ret void 948; } 949 950; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load. 951; FUNC-LABEL: {{^}}local_v8i16_to_128: 952 953; SI-NOT: ds_read_b128 954; SI-NOT: ds_write_b128 955 956; CIVI: ds_read_b128 957; CIVI: ds_write_b128 958 959; EG: LDS_READ_RET 960; EG: LDS_READ_RET 961; EG: LDS_READ_RET 962; EG: LDS_READ_RET 963define amdgpu_kernel void @local_v8i16_to_128(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) { 964 %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in, align 16 965 store <8 x i16> %ld, <8 x i16> addrspace(3)* %out, align 16 966 ret void 967} 968 969attributes #0 = { nounwind } 970