1; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s 2; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK %s 3; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s 4 5;===------------------------------------------------------------------------===; 6; GLOBAL ADDRESS SPACE 7;===------------------------------------------------------------------------===; 8 9; Load an i8 value from the global address space. 10; R600-CHECK-LABEL: @load_i8 11; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 12 13; SI-CHECK-LABEL: @load_i8 14; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, 15define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 16 %1 = load i8 addrspace(1)* %in 17 %2 = zext i8 %1 to i32 18 store i32 %2, i32 addrspace(1)* %out 19 ret void 20} 21 22; R600-CHECK-LABEL: @load_i8_sext 23; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] 24; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 25; R600-CHECK: 24 26; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 27; R600-CHECK: 24 28; SI-CHECK-LABEL: @load_i8_sext 29; SI-CHECK: BUFFER_LOAD_SBYTE 30define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 31entry: 32 %0 = load i8 addrspace(1)* %in 33 %1 = sext i8 %0 to i32 34 store i32 %1, i32 addrspace(1)* %out 35 ret void 36} 37 38; R600-CHECK-LABEL: @load_v2i8 39; R600-CHECK: VTX_READ_8 40; R600-CHECK: VTX_READ_8 41; SI-CHECK-LABEL: @load_v2i8 42; SI-CHECK: BUFFER_LOAD_UBYTE 43; SI-CHECK: BUFFER_LOAD_UBYTE 44define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { 45entry: 46 %0 = load <2 x i8> addrspace(1)* %in 47 %1 = zext <2 x i8> %0 to <2 x i32> 48 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 49 ret void 50} 51 52; R600-CHECK-LABEL: @load_v2i8_sext 53; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 54; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 55; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 56; R600-CHECK-DAG: 24 57; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 58; R600-CHECK-DAG: 24 59; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 60; R600-CHECK-DAG: 24 61; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 62; R600-CHECK-DAG: 24 63; SI-CHECK-LABEL: @load_v2i8_sext 64; SI-CHECK: BUFFER_LOAD_SBYTE 65; SI-CHECK: BUFFER_LOAD_SBYTE 66define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { 67entry: 68 %0 = load <2 x i8> addrspace(1)* %in 69 %1 = sext <2 x i8> %0 to <2 x i32> 70 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 71 ret void 72} 73 74; R600-CHECK-LABEL: @load_v4i8 75; R600-CHECK: VTX_READ_8 76; R600-CHECK: VTX_READ_8 77; R600-CHECK: VTX_READ_8 78; R600-CHECK: VTX_READ_8 79; SI-CHECK-LABEL: @load_v4i8 80; SI-CHECK: BUFFER_LOAD_UBYTE 81; SI-CHECK: BUFFER_LOAD_UBYTE 82; SI-CHECK: BUFFER_LOAD_UBYTE 83; SI-CHECK: BUFFER_LOAD_UBYTE 84define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { 85entry: 86 %0 = load <4 x i8> addrspace(1)* %in 87 %1 = zext <4 x i8> %0 to <4 x i32> 88 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 89 ret void 90} 91 92; R600-CHECK-LABEL: @load_v4i8_sext 93; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 94; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 95; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] 96; R600-CHECK-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] 97; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 98; R600-CHECK-DAG: 24 99; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 100; R600-CHECK-DAG: 24 101; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 102; R600-CHECK-DAG: 24 103; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 104; R600-CHECK-DAG: 24 105; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] 106; R600-CHECK-DAG: 24 107; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] 108; R600-CHECK-DAG: 24 109; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] 110; R600-CHECK-DAG: 24 111; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] 112; R600-CHECK-DAG: 24 113; SI-CHECK-LABEL: @load_v4i8_sext 114; SI-CHECK: BUFFER_LOAD_SBYTE 115; SI-CHECK: BUFFER_LOAD_SBYTE 116; SI-CHECK: BUFFER_LOAD_SBYTE 117; SI-CHECK: BUFFER_LOAD_SBYTE 118define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { 119entry: 120 %0 = load <4 x i8> addrspace(1)* %in 121 %1 = sext <4 x i8> %0 to <4 x i32> 122 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 123 ret void 124} 125 126; Load an i16 value from the global address space. 127; R600-CHECK-LABEL: @load_i16 128; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 129; SI-CHECK-LABEL: @load_i16 130; SI-CHECK: BUFFER_LOAD_USHORT 131define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { 132entry: 133 %0 = load i16 addrspace(1)* %in 134 %1 = zext i16 %0 to i32 135 store i32 %1, i32 addrspace(1)* %out 136 ret void 137} 138 139; R600-CHECK-LABEL: @load_i16_sext 140; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] 141; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 142; R600-CHECK: 16 143; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 144; R600-CHECK: 16 145; SI-CHECK-LABEL: @load_i16_sext 146; SI-CHECK: BUFFER_LOAD_SSHORT 147define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { 148entry: 149 %0 = load i16 addrspace(1)* %in 150 %1 = sext i16 %0 to i32 151 store i32 %1, i32 addrspace(1)* %out 152 ret void 153} 154 155; R600-CHECK-LABEL: @load_v2i16 156; R600-CHECK: VTX_READ_16 157; R600-CHECK: VTX_READ_16 158; SI-CHECK-LABEL: @load_v2i16 159; SI-CHECK: BUFFER_LOAD_USHORT 160; SI-CHECK: BUFFER_LOAD_USHORT 161define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 162entry: 163 %0 = load <2 x i16> addrspace(1)* %in 164 %1 = zext <2 x i16> %0 to <2 x i32> 165 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 166 ret void 167} 168 169; R600-CHECK-LABEL: @load_v2i16_sext 170; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 171; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 172; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 173; R600-CHECK-DAG: 16 174; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 175; R600-CHECK-DAG: 16 176; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 177; R600-CHECK-DAG: 16 178; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 179; R600-CHECK-DAG: 16 180; SI-CHECK-LABEL: @load_v2i16_sext 181; SI-CHECK: BUFFER_LOAD_SSHORT 182; SI-CHECK: BUFFER_LOAD_SSHORT 183define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 184entry: 185 %0 = load <2 x i16> addrspace(1)* %in 186 %1 = sext <2 x i16> %0 to <2 x i32> 187 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 188 ret void 189} 190 191; R600-CHECK-LABEL: @load_v4i16 192; R600-CHECK: VTX_READ_16 193; R600-CHECK: VTX_READ_16 194; R600-CHECK: VTX_READ_16 195; R600-CHECK: VTX_READ_16 196; SI-CHECK-LABEL: @load_v4i16 197; SI-CHECK: BUFFER_LOAD_USHORT 198; SI-CHECK: BUFFER_LOAD_USHORT 199; SI-CHECK: BUFFER_LOAD_USHORT 200; SI-CHECK: BUFFER_LOAD_USHORT 201define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 202entry: 203 %0 = load <4 x i16> addrspace(1)* %in 204 %1 = zext <4 x i16> %0 to <4 x i32> 205 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 206 ret void 207} 208 209; R600-CHECK-LABEL: @load_v4i16_sext 210; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 211; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 212; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] 213; R600-CHECK-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] 214; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 215; R600-CHECK-DAG: 16 216; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 217; R600-CHECK-DAG: 16 218; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 219; R600-CHECK-DAG: 16 220; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 221; R600-CHECK-DAG: 16 222; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] 223; R600-CHECK-DAG: 16 224; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] 225; R600-CHECK-DAG: 16 226; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] 227; R600-CHECK-DAG: 16 228; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] 229; R600-CHECK-DAG: 16 230; SI-CHECK-LABEL: @load_v4i16_sext 231; SI-CHECK: BUFFER_LOAD_SSHORT 232; SI-CHECK: BUFFER_LOAD_SSHORT 233; SI-CHECK: BUFFER_LOAD_SSHORT 234; SI-CHECK: BUFFER_LOAD_SSHORT 235define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 236entry: 237 %0 = load <4 x i16> addrspace(1)* %in 238 %1 = sext <4 x i16> %0 to <4 x i32> 239 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 240 ret void 241} 242 243; load an i32 value from the global address space. 244; R600-CHECK-LABEL: @load_i32 245; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 246 247; SI-CHECK-LABEL: @load_i32 248; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}} 249define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 250entry: 251 %0 = load i32 addrspace(1)* %in 252 store i32 %0, i32 addrspace(1)* %out 253 ret void 254} 255 256; load a f32 value from the global address space. 257; R600-CHECK-LABEL: @load_f32 258; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 259 260; SI-CHECK-LABEL: @load_f32 261; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}} 262define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 263entry: 264 %0 = load float addrspace(1)* %in 265 store float %0, float addrspace(1)* %out 266 ret void 267} 268 269; load a v2f32 value from the global address space 270; R600-CHECK-LABEL: @load_v2f32 271; R600-CHECK: VTX_READ_64 272 273; SI-CHECK-LABEL: @load_v2f32 274; SI-CHECK: BUFFER_LOAD_DWORDX2 275define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { 276entry: 277 %0 = load <2 x float> addrspace(1)* %in 278 store <2 x float> %0, <2 x float> addrspace(1)* %out 279 ret void 280} 281 282; R600-CHECK-LABEL: @load_i64 283; R600-CHECK: MEM_RAT 284; R600-CHECK: MEM_RAT 285 286; SI-CHECK-LABEL: @load_i64 287; SI-CHECK: BUFFER_LOAD_DWORDX2 288define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 289entry: 290 %0 = load i64 addrspace(1)* %in 291 store i64 %0, i64 addrspace(1)* %out 292 ret void 293} 294 295; R600-CHECK-LABEL: @load_i64_sext 296; R600-CHECK: MEM_RAT 297; R600-CHECK: MEM_RAT 298; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x 299; R600-CHECK: 31 300; SI-CHECK-LABEL: @load_i64_sext 301; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]:[0-9]\]]] 302 303define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { 304entry: 305 %0 = load i32 addrspace(1)* %in 306 %1 = sext i32 %0 to i64 307 store i64 %1, i64 addrspace(1)* %out 308 ret void 309} 310 311; R600-CHECK-LABEL: @load_i64_zext 312; R600-CHECK: MEM_RAT 313; R600-CHECK: MEM_RAT 314define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { 315entry: 316 %0 = load i32 addrspace(1)* %in 317 %1 = zext i32 %0 to i64 318 store i64 %1, i64 addrspace(1)* %out 319 ret void 320} 321 322;===------------------------------------------------------------------------===; 323; CONSTANT ADDRESS SPACE 324;===------------------------------------------------------------------------===; 325 326; Load a sign-extended i8 value 327; R600-CHECK-LABEL: @load_const_i8_sext 328; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] 329; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 330; R600-CHECK: 24 331; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 332; R600-CHECK: 24 333; SI-CHECK-LABEL: @load_const_i8_sext 334; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}}, 335define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 336entry: 337 %0 = load i8 addrspace(2)* %in 338 %1 = sext i8 %0 to i32 339 store i32 %1, i32 addrspace(1)* %out 340 ret void 341} 342 343; Load an aligned i8 value 344; R600-CHECK-LABEL: @load_const_i8_aligned 345; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 346; SI-CHECK-LABEL: @load_const_i8_aligned 347; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, 348define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 349entry: 350 %0 = load i8 addrspace(2)* %in 351 %1 = zext i8 %0 to i32 352 store i32 %1, i32 addrspace(1)* %out 353 ret void 354} 355 356; Load an un-aligned i8 value 357; R600-CHECK-LABEL: @load_const_i8_unaligned 358; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 359; SI-CHECK-LABEL: @load_const_i8_unaligned 360; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, 361define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 362entry: 363 %0 = getelementptr i8 addrspace(2)* %in, i32 1 364 %1 = load i8 addrspace(2)* %0 365 %2 = zext i8 %1 to i32 366 store i32 %2, i32 addrspace(1)* %out 367 ret void 368} 369 370; Load a sign-extended i16 value 371; R600-CHECK-LABEL: @load_const_i16_sext 372; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] 373; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 374; R600-CHECK: 16 375; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 376; R600-CHECK: 16 377; SI-CHECK-LABEL: @load_const_i16_sext 378; SI-CHECK: BUFFER_LOAD_SSHORT 379define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 380entry: 381 %0 = load i16 addrspace(2)* %in 382 %1 = sext i16 %0 to i32 383 store i32 %1, i32 addrspace(1)* %out 384 ret void 385} 386 387; Load an aligned i16 value 388; R600-CHECK-LABEL: @load_const_i16_aligned 389; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 390; SI-CHECK-LABEL: @load_const_i16_aligned 391; SI-CHECK: BUFFER_LOAD_USHORT 392define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 393entry: 394 %0 = load i16 addrspace(2)* %in 395 %1 = zext i16 %0 to i32 396 store i32 %1, i32 addrspace(1)* %out 397 ret void 398} 399 400; Load an un-aligned i16 value 401; R600-CHECK-LABEL: @load_const_i16_unaligned 402; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 403; SI-CHECK-LABEL: @load_const_i16_unaligned 404; SI-CHECK: BUFFER_LOAD_USHORT 405define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 406entry: 407 %0 = getelementptr i16 addrspace(2)* %in, i32 1 408 %1 = load i16 addrspace(2)* %0 409 %2 = zext i16 %1 to i32 410 store i32 %2, i32 addrspace(1)* %out 411 ret void 412} 413 414; Load an i32 value from the constant address space. 415; R600-CHECK-LABEL: @load_const_addrspace_i32 416; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 417 418; SI-CHECK-LABEL: @load_const_addrspace_i32 419; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}} 420define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 421entry: 422 %0 = load i32 addrspace(2)* %in 423 store i32 %0, i32 addrspace(1)* %out 424 ret void 425} 426 427; Load a f32 value from the constant address space. 428; R600-CHECK-LABEL: @load_const_addrspace_f32 429; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 430 431; SI-CHECK-LABEL: @load_const_addrspace_f32 432; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}} 433define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { 434 %1 = load float addrspace(2)* %in 435 store float %1, float addrspace(1)* %out 436 ret void 437} 438 439;===------------------------------------------------------------------------===; 440; LOCAL ADDRESS SPACE 441;===------------------------------------------------------------------------===; 442 443; Load an i8 value from the local address space. 444; R600-CHECK-LABEL: @load_i8_local 445; R600-CHECK: LDS_UBYTE_READ_RET 446; SI-CHECK-LABEL: @load_i8_local 447; SI-CHECK-NOT: S_WQM_B64 448; SI-CHECK: DS_READ_U8 449define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { 450 %1 = load i8 addrspace(3)* %in 451 %2 = zext i8 %1 to i32 452 store i32 %2, i32 addrspace(1)* %out 453 ret void 454} 455 456; R600-CHECK-LABEL: @load_i8_sext_local 457; R600-CHECK: LDS_UBYTE_READ_RET 458; R600-CHECK: ASHR 459; SI-CHECK-LABEL: @load_i8_sext_local 460; SI-CHECK-NOT: S_WQM_B64 461; SI-CHECK: DS_READ_I8 462define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { 463entry: 464 %0 = load i8 addrspace(3)* %in 465 %1 = sext i8 %0 to i32 466 store i32 %1, i32 addrspace(1)* %out 467 ret void 468} 469 470; R600-CHECK-LABEL: @load_v2i8_local 471; R600-CHECK: LDS_UBYTE_READ_RET 472; R600-CHECK: LDS_UBYTE_READ_RET 473; SI-CHECK-LABEL: @load_v2i8_local 474; SI-CHECK-NOT: S_WQM_B64 475; SI-CHECK: DS_READ_U8 476; SI-CHECK: DS_READ_U8 477define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { 478entry: 479 %0 = load <2 x i8> addrspace(3)* %in 480 %1 = zext <2 x i8> %0 to <2 x i32> 481 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 482 ret void 483} 484 485; R600-CHECK-LABEL: @load_v2i8_sext_local 486; R600-CHECK-DAG: LDS_UBYTE_READ_RET 487; R600-CHECK-DAG: LDS_UBYTE_READ_RET 488; R600-CHECK-DAG: ASHR 489; R600-CHECK-DAG: ASHR 490; SI-CHECK-LABEL: @load_v2i8_sext_local 491; SI-CHECK-NOT: S_WQM_B64 492; SI-CHECK: DS_READ_I8 493; SI-CHECK: DS_READ_I8 494define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { 495entry: 496 %0 = load <2 x i8> addrspace(3)* %in 497 %1 = sext <2 x i8> %0 to <2 x i32> 498 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 499 ret void 500} 501 502; R600-CHECK-LABEL: @load_v4i8_local 503; R600-CHECK: LDS_UBYTE_READ_RET 504; R600-CHECK: LDS_UBYTE_READ_RET 505; R600-CHECK: LDS_UBYTE_READ_RET 506; R600-CHECK: LDS_UBYTE_READ_RET 507; SI-CHECK-LABEL: @load_v4i8_local 508; SI-CHECK-NOT: S_WQM_B64 509; SI-CHECK: DS_READ_U8 510; SI-CHECK: DS_READ_U8 511; SI-CHECK: DS_READ_U8 512; SI-CHECK: DS_READ_U8 513define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { 514entry: 515 %0 = load <4 x i8> addrspace(3)* %in 516 %1 = zext <4 x i8> %0 to <4 x i32> 517 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 518 ret void 519} 520 521; R600-CHECK-LABEL: @load_v4i8_sext_local 522; R600-CHECK-DAG: LDS_UBYTE_READ_RET 523; R600-CHECK-DAG: LDS_UBYTE_READ_RET 524; R600-CHECK-DAG: LDS_UBYTE_READ_RET 525; R600-CHECK-DAG: LDS_UBYTE_READ_RET 526; R600-CHECK-DAG: ASHR 527; R600-CHECK-DAG: ASHR 528; R600-CHECK-DAG: ASHR 529; R600-CHECK-DAG: ASHR 530; SI-CHECK-LABEL: @load_v4i8_sext_local 531; SI-CHECK-NOT: S_WQM_B64 532; SI-CHECK: DS_READ_I8 533; SI-CHECK: DS_READ_I8 534; SI-CHECK: DS_READ_I8 535; SI-CHECK: DS_READ_I8 536define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { 537entry: 538 %0 = load <4 x i8> addrspace(3)* %in 539 %1 = sext <4 x i8> %0 to <4 x i32> 540 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 541 ret void 542} 543 544; Load an i16 value from the local address space. 545; R600-CHECK-LABEL: @load_i16_local 546; R600-CHECK: LDS_USHORT_READ_RET 547; SI-CHECK-LABEL: @load_i16_local 548; SI-CHECK-NOT: S_WQM_B64 549; SI-CHECK: DS_READ_U16 550define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { 551entry: 552 %0 = load i16 addrspace(3)* %in 553 %1 = zext i16 %0 to i32 554 store i32 %1, i32 addrspace(1)* %out 555 ret void 556} 557 558; R600-CHECK-LABEL: @load_i16_sext_local 559; R600-CHECK: LDS_USHORT_READ_RET 560; R600-CHECK: ASHR 561; SI-CHECK-LABEL: @load_i16_sext_local 562; SI-CHECK-NOT: S_WQM_B64 563; SI-CHECK: DS_READ_I16 564define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { 565entry: 566 %0 = load i16 addrspace(3)* %in 567 %1 = sext i16 %0 to i32 568 store i32 %1, i32 addrspace(1)* %out 569 ret void 570} 571 572; R600-CHECK-LABEL: @load_v2i16_local 573; R600-CHECK: LDS_USHORT_READ_RET 574; R600-CHECK: LDS_USHORT_READ_RET 575; SI-CHECK-LABEL: @load_v2i16_local 576; SI-CHECK-NOT: S_WQM_B64 577; SI-CHECK: DS_READ_U16 578; SI-CHECK: DS_READ_U16 579define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { 580entry: 581 %0 = load <2 x i16> addrspace(3)* %in 582 %1 = zext <2 x i16> %0 to <2 x i32> 583 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 584 ret void 585} 586 587; R600-CHECK-LABEL: @load_v2i16_sext_local 588; R600-CHECK-DAG: LDS_USHORT_READ_RET 589; R600-CHECK-DAG: LDS_USHORT_READ_RET 590; R600-CHECK-DAG: ASHR 591; R600-CHECK-DAG: ASHR 592; SI-CHECK-LABEL: @load_v2i16_sext_local 593; SI-CHECK-NOT: S_WQM_B64 594; SI-CHECK: DS_READ_I16 595; SI-CHECK: DS_READ_I16 596define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { 597entry: 598 %0 = load <2 x i16> addrspace(3)* %in 599 %1 = sext <2 x i16> %0 to <2 x i32> 600 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 601 ret void 602} 603 604; R600-CHECK-LABEL: @load_v4i16_local 605; R600-CHECK: LDS_USHORT_READ_RET 606; R600-CHECK: LDS_USHORT_READ_RET 607; R600-CHECK: LDS_USHORT_READ_RET 608; R600-CHECK: LDS_USHORT_READ_RET 609; SI-CHECK-LABEL: @load_v4i16_local 610; SI-CHECK-NOT: S_WQM_B64 611; SI-CHECK: DS_READ_U16 612; SI-CHECK: DS_READ_U16 613; SI-CHECK: DS_READ_U16 614; SI-CHECK: DS_READ_U16 615define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { 616entry: 617 %0 = load <4 x i16> addrspace(3)* %in 618 %1 = zext <4 x i16> %0 to <4 x i32> 619 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 620 ret void 621} 622 623; R600-CHECK-LABEL: @load_v4i16_sext_local 624; R600-CHECK-DAG: LDS_USHORT_READ_RET 625; R600-CHECK-DAG: LDS_USHORT_READ_RET 626; R600-CHECK-DAG: LDS_USHORT_READ_RET 627; R600-CHECK-DAG: LDS_USHORT_READ_RET 628; R600-CHECK-DAG: ASHR 629; R600-CHECK-DAG: ASHR 630; R600-CHECK-DAG: ASHR 631; R600-CHECK-DAG: ASHR 632; SI-CHECK-LABEL: @load_v4i16_sext_local 633; SI-CHECK-NOT: S_WQM_B64 634; SI-CHECK: DS_READ_I16 635; SI-CHECK: DS_READ_I16 636; SI-CHECK: DS_READ_I16 637; SI-CHECK: DS_READ_I16 638define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { 639entry: 640 %0 = load <4 x i16> addrspace(3)* %in 641 %1 = sext <4 x i16> %0 to <4 x i32> 642 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 643 ret void 644} 645 646; load an i32 value from the glocal address space. 647; R600-CHECK-LABEL: @load_i32_local 648; R600-CHECK: LDS_READ_RET 649; SI-CHECK-LABEL: @load_i32_local 650; SI-CHECK-NOT: S_WQM_B64 651; SI-CHECK: DS_READ_B32 652define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { 653entry: 654 %0 = load i32 addrspace(3)* %in 655 store i32 %0, i32 addrspace(1)* %out 656 ret void 657} 658 659; load a f32 value from the global address space. 660; R600-CHECK-LABEL: @load_f32_local 661; R600-CHECK: LDS_READ_RET 662; SI-CHECK-LABEL: @load_f32_local 663; SI-CHECK: DS_READ_B32 664define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) { 665entry: 666 %0 = load float addrspace(3)* %in 667 store float %0, float addrspace(1)* %out 668 ret void 669} 670 671; load a v2f32 value from the local address space 672; R600-CHECK-LABEL: @load_v2f32_local 673; R600-CHECK: LDS_READ_RET 674; R600-CHECK: LDS_READ_RET 675; SI-CHECK-LABEL: @load_v2f32_local 676; SI-CHECK: DS_READ_B32 677; SI-CHECK: DS_READ_B32 678define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { 679entry: 680 %0 = load <2 x float> addrspace(3)* %in 681 store <2 x float> %0, <2 x float> addrspace(1)* %out 682 ret void 683} 684