1; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s 2; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s 4 5; RUN: opt -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s 6; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s 7; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s 8; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s 9; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s 10; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s 11; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s 12; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s 13 14; GCN-LABEL: {{^}}fold_wavefrontsize: 15; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize( 16 17; W32: v_mov_b32_e32 [[V:v[0-9]+]], 32 18; W64: v_mov_b32_e32 [[V:v[0-9]+]], 64 19; GCN: store_dword v{{.+}}, [[V]] 20 21; OPT-W32: store i32 32, i32 addrspace(1)* %arg, align 4 22; OPT-W64: store i32 64, i32 addrspace(1)* %arg, align 4 23; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() 24; OPT-WXX: store i32 %tmp, i32 addrspace(1)* %arg, align 4 25; OPT-NEXT: ret void 26 27define amdgpu_kernel void @fold_wavefrontsize(i32 addrspace(1)* nocapture %arg) { 28bb: 29 %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 30 store i32 %tmp, i32 addrspace(1)* %arg, align 4 31 ret void 32} 33 34; GCN-LABEL: {{^}}fold_and_optimize_wavefrontsize: 35; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize( 36 37; W32: v_mov_b32_e32 [[V:v[0-9]+]], 1{{$}} 38; W64: v_mov_b32_e32 [[V:v[0-9]+]], 2{{$}} 39; GCN-NOT: cndmask 40; GCN: store_dword v{{.+}}, [[V]] 41 42; OPT-W32: store i32 1, i32 addrspace(1)* %arg, align 4 43; OPT-W64: store i32 2, i32 addrspace(1)* %arg, align 4 44; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() 45; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32 46; OPT-WXX: %tmp2 = select i1 %tmp1, i32 2, i32 1 47; OPT-WXX: store i32 %tmp2, i32 addrspace(1)* %arg 48; OPT-NEXT: ret void 49 50define amdgpu_kernel void @fold_and_optimize_wavefrontsize(i32 addrspace(1)* nocapture %arg) { 51bb: 52 %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 53 %tmp1 = icmp ugt i32 %tmp, 32 54 %tmp2 = select i1 %tmp1, i32 2, i32 1 55 store i32 %tmp2, i32 addrspace(1)* %arg 56 ret void 57} 58 59; GCN-LABEL: {{^}}fold_and_optimize_if_wavefrontsize: 60; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize( 61 62; OPT: bb: 63; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() 64; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32 65; OPT-WXX: bb3: 66; OPT-W64: store i32 1, i32 addrspace(1)* %arg, align 4 67; OPT-NEXT: ret void 68 69define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(i32 addrspace(1)* nocapture %arg) { 70bb: 71 %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 72 %tmp1 = icmp ugt i32 %tmp, 32 73 br i1 %tmp1, label %bb2, label %bb3 74 75bb2: ; preds = %bb 76 store i32 1, i32 addrspace(1)* %arg, align 4 77 br label %bb3 78 79bb3: ; preds = %bb2, %bb 80 ret void 81} 82 83declare i32 @llvm.amdgcn.wavefrontsize() #0 84 85attributes #0 = { nounwind readnone speculatable } 86