1; ## Full FP16 support enabled by default. 2; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 3; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ 4; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-F16 %s 5; ## FP16 support explicitly disabled. 6; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \ 7; RUN: -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \ 8; RUN: -verify-machineinstrs \ 9; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s 10; ## FP16 is not supported by hardware. 11; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \ 12; RUN: -disable-post-ra -frame-pointer=all -verify-machineinstrs \ 13; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s 14 15target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 16 17; CHECK-LABEL: test_ret_const( 18; CHECK: mov.u32 [[T:%r[0-9+]]], 1073757184; 19; CHECK: mov.b32 [[R:%hh[0-9+]]], [[T]]; 20; CHECK: st.param.b32 [func_retval0+0], [[R]]; 21; CHECK-NEXT: ret; 22define <2 x half> @test_ret_const() #0 { 23 ret <2 x half> <half 1.0, half 2.0> 24} 25 26; CHECK-LABEL: test_extract_0( 27; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_0_param_0]; 28; CHECK: mov.b32 {[[R:%h[0-9]+]], %tmp_hi}, [[A]]; 29; CHECK: st.param.b16 [func_retval0+0], [[R]]; 30; CHECK: ret; 31define half @test_extract_0(<2 x half> %a) #0 { 32 %e = extractelement <2 x half> %a, i32 0 33 ret half %e 34} 35 36; CHECK-LABEL: test_extract_1( 37; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_1_param_0]; 38; CHECK: mov.b32 {%tmp_lo, [[R:%h[0-9]+]]}, [[A]]; 39; CHECK: st.param.b16 [func_retval0+0], [[R]]; 40; CHECK: ret; 41define half @test_extract_1(<2 x half> %a) #0 { 42 %e = extractelement <2 x half> %a, i32 1 43 ret half %e 44} 45 46; CHECK-LABEL: test_extract_i( 47; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_i_param_0]; 48; CHECK-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1]; 49; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0; 50; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[A]]; 51; CHECK: selp.b16 [[R:%h[0-9]+]], [[E0]], [[E1]], [[PRED]]; 52; CHECK: st.param.b16 [func_retval0+0], [[R]]; 53; CHECK: ret; 54define half @test_extract_i(<2 x half> %a, i64 %idx) #0 { 55 %e = extractelement <2 x half> %a, i64 %idx 56 ret half %e 57} 58 59; CHECK-LABEL: test_fadd( 60; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_param_0]; 61; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_param_1]; 62; 63; CHECK-F16-NEXT: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; 64; 65; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 66; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 67; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 68; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 69; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 70; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 71; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; 72; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; 73; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 74; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 75; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 76; 77; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 78; CHECK-NEXT: ret; 79define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 { 80 %r = fadd <2 x half> %a, %b 81 ret <2 x half> %r 82} 83 84; Check that we can lower fadd with immediate arguments. 85; CHECK-LABEL: test_fadd_imm_0( 86; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_imm_0_param_0]; 87; 88; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184; 89; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]]; 90; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[IHH]]; 91; 92; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 93; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 94; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 95; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000; 96; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000; 97; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 98; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 99; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 100; 101; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 102; CHECK-NEXT: ret; 103define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 { 104 %r = fadd <2 x half> <half 1.0, half 2.0>, %a 105 ret <2 x half> %r 106} 107 108; CHECK-LABEL: test_fadd_imm_1( 109; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_imm_1_param_0]; 110; 111; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184; 112; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]]; 113; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[IHH]]; 114; 115; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 116; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 117; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 118; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000; 119; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000; 120; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 121; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 122; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 123; 124; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 125; CHECK-NEXT: ret; 126define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 { 127 %r = fadd <2 x half> %a, <half 1.0, half 2.0> 128 ret <2 x half> %r 129} 130 131; CHECK-LABEL: test_fsub( 132; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fsub_param_0]; 133; 134; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fsub_param_1]; 135; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; 136; 137; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 138; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 139; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 140; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 141; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 142; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 143; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; 144; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; 145; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 146; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 147; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 148; 149; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 150; CHECK-NEXT: ret; 151define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 { 152 %r = fsub <2 x half> %a, %b 153 ret <2 x half> %r 154} 155 156; CHECK-LABEL: test_fneg( 157; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fneg_param_0]; 158; 159; CHECK-F16: mov.u32 [[I0:%r[0-9+]]], 0; 160; CHECK-F16: mov.b32 [[IHH0:%hh[0-9+]]], [[I0]]; 161; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[IHH0]], [[A]]; 162; 163; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 164; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 165; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 166; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; 167; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[Z]], [[FA0]]; 168; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[Z]], [[FA1]]; 169; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 170; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 171; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 172; 173; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 174; CHECK-NEXT: ret; 175define <2 x half> @test_fneg(<2 x half> %a) #0 { 176 %r = fsub <2 x half> <half 0.0, half 0.0>, %a 177 ret <2 x half> %r 178} 179 180; CHECK-LABEL: test_fmul( 181; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmul_param_0]; 182; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmul_param_1]; 183; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; 184; 185; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 186; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 187; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 188; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 189; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 190; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 191; CHECK-NOF16-DAG: mul.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; 192; CHECK-NOF16-DAG: mul.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; 193; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 194; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 195; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 196; 197; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 198; CHECK-NEXT: ret; 199define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 { 200 %r = fmul <2 x half> %a, %b 201 ret <2 x half> %r 202} 203 204; CHECK-LABEL: test_fdiv( 205; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fdiv_param_0]; 206; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fdiv_param_1]; 207; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 208; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 209; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]; 210; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]; 211; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]; 212; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]; 213; CHECK-DAG: div.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; 214; CHECK-DAG: div.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; 215; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]; 216; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]; 217; CHECK-NEXT: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 218; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 219; CHECK-NEXT: ret; 220define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 { 221 %r = fdiv <2 x half> %a, %b 222 ret <2 x half> %r 223} 224 225; CHECK-LABEL: test_frem( 226; -- Load two 16x2 inputs and split them into f16 elements 227; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_frem_param_0]; 228; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_frem_param_1]; 229; -- Split into elements 230; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 231; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 232; -- promote to f32. 233; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]; 234; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]; 235; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]; 236; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]; 237; -- frem(a[0],b[0]). 238; CHECK-DAG: div.rn.f32 [[FD0:%f[0-9]+]], [[FA0]], [[FB0]]; 239; CHECK-DAG: cvt.rmi.f32.f32 [[DI0:%f[0-9]+]], [[FD0]]; 240; CHECK-DAG: mul.f32 [[RI0:%f[0-9]+]], [[DI0]], [[FB0]]; 241; CHECK-DAG: sub.f32 [[RF0:%f[0-9]+]], [[FA0]], [[RI0]]; 242; -- frem(a[1],b[1]). 243; CHECK-DAG: div.rn.f32 [[FD1:%f[0-9]+]], [[FA1]], [[FB1]]; 244; CHECK-DAG: cvt.rmi.f32.f32 [[DI1:%f[0-9]+]], [[FD1]]; 245; CHECK-DAG: mul.f32 [[RI1:%f[0-9]+]], [[DI1]], [[FB1]]; 246; CHECK-DAG: sub.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RI1]]; 247; -- convert back to f16. 248; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 249; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 250; -- merge into f16x2 and return it. 251; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 252; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 253; CHECK-NEXT: ret; 254define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 { 255 %r = frem <2 x half> %a, %b 256 ret <2 x half> %r 257} 258 259; CHECK-LABEL: .func test_ldst_v2f16( 260; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0]; 261; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1]; 262; CHECK-DAG: ld.b32 [[E:%hh[0-9]+]], [%[[A]]] 263; CHECK: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]]; 264; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]}; 265; CHECK: ret; 266define void @test_ldst_v2f16(<2 x half>* %a, <2 x half>* %b) { 267 %t1 = load <2 x half>, <2 x half>* %a 268 store <2 x half> %t1, <2 x half>* %b, align 16 269 ret void 270} 271 272; CHECK-LABEL: .func test_ldst_v3f16( 273; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v3f16_param_0]; 274; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v3f16_param_1]; 275; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair 276; number of bitshifting instructions that may change at llvm's whim. 277; So we only verify that we only issue correct number of writes using 278; correct offset, but not the values we write. 279; CHECK-DAG: ld.u64 280; CHECK-DAG: st.u32 [%[[B]]], 281; CHECK-DAG: st.b16 [%[[B]]+4], 282; CHECK: ret; 283define void @test_ldst_v3f16(<3 x half>* %a, <3 x half>* %b) { 284 %t1 = load <3 x half>, <3 x half>* %a 285 store <3 x half> %t1, <3 x half>* %b, align 16 286 ret void 287} 288 289; CHECK-LABEL: .func test_ldst_v4f16( 290; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4f16_param_0]; 291; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4f16_param_1]; 292; CHECK-DAG: ld.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]]; 293; CHECK-DAG: st.v4.b16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]}; 294; CHECK: ret; 295define void @test_ldst_v4f16(<4 x half>* %a, <4 x half>* %b) { 296 %t1 = load <4 x half>, <4 x half>* %a 297 store <4 x half> %t1, <4 x half>* %b, align 16 298 ret void 299} 300 301; CHECK-LABEL: .func test_ldst_v8f16( 302; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v8f16_param_0]; 303; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v8f16_param_1]; 304; CHECK-DAG: ld.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [%[[A]]]; 305; CHECK-DAG: st.v4.b32 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]}; 306; CHECK: ret; 307define void @test_ldst_v8f16(<8 x half>* %a, <8 x half>* %b) { 308 %t1 = load <8 x half>, <8 x half>* %a 309 store <8 x half> %t1, <8 x half>* %b, align 16 310 ret void 311} 312 313declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0 314 315; CHECK-LABEL: test_call( 316; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_param_0]; 317; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_param_1]; 318; CHECK: { 319; CHECK-DAG: .param .align 4 .b8 param0[4]; 320; CHECK-DAG: .param .align 4 .b8 param1[4]; 321; CHECK-DAG: st.param.b32 [param0+0], [[A]]; 322; CHECK-DAG: st.param.b32 [param1+0], [[B]]; 323; CHECK-DAG: .param .align 4 .b8 retval0[4]; 324; CHECK: call.uni (retval0), 325; CHECK-NEXT: test_callee, 326; CHECK: ); 327; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; 328; CHECK-NEXT: } 329; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 330; CHECK-NEXT: ret; 331define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 { 332 %r = call <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) 333 ret <2 x half> %r 334} 335 336; CHECK-LABEL: test_call_flipped( 337; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_flipped_param_0]; 338; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_flipped_param_1]; 339; CHECK: { 340; CHECK-DAG: .param .align 4 .b8 param0[4]; 341; CHECK-DAG: .param .align 4 .b8 param1[4]; 342; CHECK-DAG: st.param.b32 [param0+0], [[B]]; 343; CHECK-DAG: st.param.b32 [param1+0], [[A]]; 344; CHECK-DAG: .param .align 4 .b8 retval0[4]; 345; CHECK: call.uni (retval0), 346; CHECK-NEXT: test_callee, 347; CHECK: ); 348; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; 349; CHECK-NEXT: } 350; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 351; CHECK-NEXT: ret; 352define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 { 353 %r = call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a) 354 ret <2 x half> %r 355} 356 357; CHECK-LABEL: test_tailcall_flipped( 358; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_tailcall_flipped_param_0]; 359; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_tailcall_flipped_param_1]; 360; CHECK: { 361; CHECK-DAG: .param .align 4 .b8 param0[4]; 362; CHECK-DAG: .param .align 4 .b8 param1[4]; 363; CHECK-DAG: st.param.b32 [param0+0], [[B]]; 364; CHECK-DAG: st.param.b32 [param1+0], [[A]]; 365; CHECK-DAG: .param .align 4 .b8 retval0[4]; 366; CHECK: call.uni (retval0), 367; CHECK-NEXT: test_callee, 368; CHECK: ); 369; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; 370; CHECK-NEXT: } 371; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 372; CHECK-NEXT: ret; 373define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 { 374 %r = tail call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a) 375 ret <2 x half> %r 376} 377 378; CHECK-LABEL: test_select( 379; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_param_0]; 380; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_param_1]; 381; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2] 382; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; 383; CHECK-NEXT: selp.b32 [[R:%hh[0-9]+]], [[A]], [[B]], [[PRED]]; 384; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 385; CHECK-NEXT: ret; 386define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 { 387 %r = select i1 %c, <2 x half> %a, <2 x half> %b 388 ret <2 x half> %r 389} 390 391; CHECK-LABEL: test_select_cc( 392; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_param_0]; 393; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_param_1]; 394; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_param_2]; 395; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_param_3]; 396; 397; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]] 398; 399; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] 400; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]] 401; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]]; 402; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]]; 403; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]]; 404; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]]; 405; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]] 406; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]] 407; 408; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 409; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 410; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]]; 411; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]]; 412; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 413; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 414; CHECK-NEXT: ret; 415define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 { 416 %cc = fcmp une <2 x half> %c, %d 417 %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b 418 ret <2 x half> %r 419} 420 421; CHECK-LABEL: test_select_cc_f32_f16( 422; CHECK-DAG: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_0]; 423; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_1]; 424; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_f32_f16_param_2]; 425; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_f32_f16_param_3]; 426; 427; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]] 428; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] 429; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]] 430; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]]; 431; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]]; 432; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]]; 433; CHECK-NOF16-DAG: cvt.f32.f16 [[CF1:%f[0-9]+]], [[C1]]; 434; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]] 435; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]] 436; 437; CHECK-DAG: selp.f32 [[R0:%f[0-9]+]], [[A0]], [[B0]], [[P0]]; 438; CHECK-DAG: selp.f32 [[R1:%f[0-9]+]], [[A1]], [[B1]], [[P1]]; 439; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; 440; CHECK-NEXT: ret; 441define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b, 442 <2 x half> %c, <2 x half> %d) #0 { 443 %cc = fcmp une <2 x half> %c, %d 444 %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b 445 ret <2 x float> %r 446} 447 448; CHECK-LABEL: test_select_cc_f16_f32( 449; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_f16_f32_param_0]; 450; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_f16_f32_param_1]; 451; CHECK-DAG: ld.param.v2.f32 {[[C0:%f[0-9]+]], [[C1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_2]; 452; CHECK-DAG: ld.param.v2.f32 {[[D0:%f[0-9]+]], [[D1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_3]; 453; CHECK-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[C0]], [[D0]] 454; CHECK-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[C1]], [[D1]] 455; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 456; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 457; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]]; 458; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]]; 459; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 460; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; 461; CHECK-NEXT: ret; 462define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b, 463 <2 x float> %c, <2 x float> %d) #0 { 464 %cc = fcmp une <2 x float> %c, %d 465 %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b 466 ret <2 x half> %r 467} 468 469; CHECK-LABEL: test_fcmp_une( 470; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_une_param_0]; 471; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_une_param_1]; 472; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 473; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 474; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 475; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 476; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 477; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 478; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 479; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 480; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 481; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 482; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 483; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 484; CHECK-NEXT: ret; 485define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 { 486 %r = fcmp une <2 x half> %a, %b 487 ret <2 x i1> %r 488} 489 490; CHECK-LABEL: test_fcmp_ueq( 491; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ueq_param_0]; 492; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ueq_param_1]; 493; CHECK-F16: setp.equ.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 494; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 495; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 496; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 497; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 498; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 499; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 500; CHECK-NOF16-DAG: setp.equ.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 501; CHECK-NOF16-DAG: setp.equ.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 502; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 503; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 504; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 505; CHECK-NEXT: ret; 506define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 { 507 %r = fcmp ueq <2 x half> %a, %b 508 ret <2 x i1> %r 509} 510 511; CHECK-LABEL: test_fcmp_ugt( 512; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ugt_param_0]; 513; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ugt_param_1]; 514; CHECK-F16: setp.gtu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 515; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 516; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 517; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 518; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 519; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 520; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 521; CHECK-NOF16-DAG: setp.gtu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 522; CHECK-NOF16-DAG: setp.gtu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 523; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 524; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 525; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 526; CHECK-NEXT: ret; 527define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 { 528 %r = fcmp ugt <2 x half> %a, %b 529 ret <2 x i1> %r 530} 531 532; CHECK-LABEL: test_fcmp_uge( 533; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uge_param_0]; 534; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uge_param_1]; 535; CHECK-F16: setp.geu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 536; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 537; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 538; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 539; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 540; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 541; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 542; CHECK-NOF16-DAG: setp.geu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 543; CHECK-NOF16-DAG: setp.geu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 544; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 545; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 546; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 547; CHECK-NEXT: ret; 548define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 { 549 %r = fcmp uge <2 x half> %a, %b 550 ret <2 x i1> %r 551} 552 553; CHECK-LABEL: test_fcmp_ult( 554; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ult_param_0]; 555; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ult_param_1]; 556; CHECK-F16: setp.ltu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 557; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 558; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 559; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 560; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 561; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 562; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 563; CHECK-NOF16-DAG: setp.ltu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 564; CHECK-NOF16-DAG: setp.ltu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 565; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 566; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 567; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 568; CHECK-NEXT: ret; 569define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 { 570 %r = fcmp ult <2 x half> %a, %b 571 ret <2 x i1> %r 572} 573 574; CHECK-LABEL: test_fcmp_ule( 575; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ule_param_0]; 576; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ule_param_1]; 577; CHECK-F16: setp.leu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 578; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 579; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 580; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 581; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 582; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 583; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 584; CHECK-NOF16-DAG: setp.leu.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 585; CHECK-NOF16-DAG: setp.leu.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 586; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 587; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 588; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 589; CHECK-NEXT: ret; 590define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 { 591 %r = fcmp ule <2 x half> %a, %b 592 ret <2 x i1> %r 593} 594 595 596; CHECK-LABEL: test_fcmp_uno( 597; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uno_param_0]; 598; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uno_param_1]; 599; CHECK-F16: setp.nan.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 600; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 601; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 602; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 603; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 604; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 605; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 606; CHECK-NOF16-DAG: setp.nan.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 607; CHECK-NOF16-DAG: setp.nan.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 608; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 609; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 610; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 611; CHECK-NEXT: ret; 612define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 { 613 %r = fcmp uno <2 x half> %a, %b 614 ret <2 x i1> %r 615} 616 617; CHECK-LABEL: test_fcmp_one( 618; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_one_param_0]; 619; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_one_param_1]; 620; CHECK-F16: setp.ne.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 621; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 622; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 623; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 624; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 625; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 626; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 627; CHECK-NOF16-DAG: setp.ne.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 628; CHECK-NOF16-DAG: setp.ne.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 629; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 630; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 631; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 632; CHECK-NEXT: ret; 633define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 { 634 %r = fcmp one <2 x half> %a, %b 635 ret <2 x i1> %r 636} 637 638; CHECK-LABEL: test_fcmp_oeq( 639; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oeq_param_0]; 640; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oeq_param_1]; 641; CHECK-F16: setp.eq.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 642; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 643; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 644; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 645; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 646; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 647; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 648; CHECK-NOF16-DAG: setp.eq.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 649; CHECK-NOF16-DAG: setp.eq.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 650; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 651; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 652; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 653; CHECK-NEXT: ret; 654define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 { 655 %r = fcmp oeq <2 x half> %a, %b 656 ret <2 x i1> %r 657} 658 659; CHECK-LABEL: test_fcmp_ogt( 660; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ogt_param_0]; 661; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ogt_param_1]; 662; CHECK-F16: setp.gt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 663; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 664; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 665; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 666; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 667; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 668; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 669; CHECK-NOF16-DAG: setp.gt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 670; CHECK-NOF16-DAG: setp.gt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 671; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 672; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 673; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 674; CHECK-NEXT: ret; 675define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 { 676 %r = fcmp ogt <2 x half> %a, %b 677 ret <2 x i1> %r 678} 679 680; CHECK-LABEL: test_fcmp_oge( 681; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oge_param_0]; 682; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oge_param_1]; 683; CHECK-F16: setp.ge.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 684; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 685; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 686; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 687; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 688; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 689; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 690; CHECK-NOF16-DAG: setp.ge.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 691; CHECK-NOF16-DAG: setp.ge.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 692; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 693; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 694; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 695; CHECK-NEXT: ret; 696define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 { 697 %r = fcmp oge <2 x half> %a, %b 698 ret <2 x i1> %r 699} 700 701; CHECK-LABEL: test_fcmp_olt( 702; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_olt_param_0]; 703; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_olt_param_1]; 704; CHECK-F16: setp.lt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 705; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 706; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 707; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 708; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 709; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 710; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 711; CHECK-NOF16-DAG: setp.lt.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 712; CHECK-NOF16-DAG: setp.lt.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 713; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 714; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 715; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 716; CHECK-NEXT: ret; 717define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 { 718 %r = fcmp olt <2 x half> %a, %b 719 ret <2 x i1> %r 720} 721 722; XCHECK-LABEL: test_fcmp_ole( 723; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ole_param_0]; 724; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ole_param_1]; 725; CHECK-F16: setp.le.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 726; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 727; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 728; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 729; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 730; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 731; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 732; CHECK-NOF16-DAG: setp.le.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 733; CHECK-NOF16-DAG: setp.le.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 734; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 735; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 736; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 737; CHECK-NEXT: ret; 738define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 { 739 %r = fcmp ole <2 x half> %a, %b 740 ret <2 x i1> %r 741} 742 743; CHECK-LABEL: test_fcmp_ord( 744; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ord_param_0]; 745; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ord_param_1]; 746; CHECK-F16: setp.num.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] 747; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 748; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 749; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 750; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 751; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 752; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 753; CHECK-NOF16-DAG: setp.num.f32 [[P0:%p[0-9]+]], [[FA0]], [[FB0]] 754; CHECK-NOF16-DAG: setp.num.f32 [[P1:%p[0-9]+]], [[FA1]], [[FB1]] 755; CHECK-DAG: selp.u16 [[R0:%rs[0-9]+]], -1, 0, [[P0]]; 756; CHECK-DAG: selp.u16 [[R1:%rs[0-9]+]], -1, 0, [[P1]]; 757; CHECK-NEXT: st.param.v2.b8 [func_retval0+0], {[[R0]], [[R1]]}; 758; CHECK-NEXT: ret; 759define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 { 760 %r = fcmp ord <2 x half> %a, %b 761 ret <2 x i1> %r 762} 763 764; CHECK-LABEL: test_fptosi_i32( 765; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i32_param_0]; 766; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 767; CHECK-DAG: cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]]; 768; CHECK-DAG: cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]]; 769; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} 770; CHECK: ret; 771define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 { 772 %r = fptosi <2 x half> %a to <2 x i32> 773 ret <2 x i32> %r 774} 775 776; CHECK-LABEL: test_fptosi_i64( 777; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i64_param_0]; 778; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 779; CHECK-DAG: cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]]; 780; CHECK-DAG: cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]]; 781; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} 782; CHECK: ret; 783define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 { 784 %r = fptosi <2 x half> %a to <2 x i64> 785 ret <2 x i64> %r 786} 787 788; CHECK-LABEL: test_fptoui_2xi32( 789; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi32_param_0]; 790; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 791; CHECK-DAG: cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]]; 792; CHECK-DAG: cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]]; 793; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} 794; CHECK: ret; 795define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 { 796 %r = fptoui <2 x half> %a to <2 x i32> 797 ret <2 x i32> %r 798} 799 800; CHECK-LABEL: test_fptoui_2xi64( 801; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi64_param_0]; 802; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 803; CHECK-DAG: cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]]; 804; CHECK-DAG: cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]]; 805; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} 806; CHECK: ret; 807define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 { 808 %r = fptoui <2 x half> %a to <2 x i64> 809 ret <2 x i64> %r 810} 811 812; CHECK-LABEL: test_uitofp_2xi32( 813; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_param_0]; 814; CHECK-DAG: cvt.rn.f16.u32 [[R0:%h[0-9]+]], [[A0]]; 815; CHECK-DAG: cvt.rn.f16.u32 [[R1:%h[0-9]+]], [[A1]]; 816; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 817; CHECK: st.param.b32 [func_retval0+0], [[R]]; 818; CHECK: ret; 819define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 { 820 %r = uitofp <2 x i32> %a to <2 x half> 821 ret <2 x half> %r 822} 823 824; CHECK-LABEL: test_uitofp_2xi64( 825; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_uitofp_2xi64_param_0]; 826; CHECK-DAG: cvt.rn.f16.u64 [[R0:%h[0-9]+]], [[A0]]; 827; CHECK-DAG: cvt.rn.f16.u64 [[R1:%h[0-9]+]], [[A1]]; 828; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 829; CHECK: st.param.b32 [func_retval0+0], [[R]]; 830; CHECK: ret; 831define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 { 832 %r = uitofp <2 x i64> %a to <2 x half> 833 ret <2 x half> %r 834} 835 836; CHECK-LABEL: test_sitofp_2xi32( 837; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_param_0]; 838; CHECK-DAG: cvt.rn.f16.s32 [[R0:%h[0-9]+]], [[A0]]; 839; CHECK-DAG: cvt.rn.f16.s32 [[R1:%h[0-9]+]], [[A1]]; 840; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 841; CHECK: st.param.b32 [func_retval0+0], [[R]]; 842; CHECK: ret; 843define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 { 844 %r = sitofp <2 x i32> %a to <2 x half> 845 ret <2 x half> %r 846} 847 848; CHECK-LABEL: test_sitofp_2xi64( 849; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_sitofp_2xi64_param_0]; 850; CHECK-DAG: cvt.rn.f16.s64 [[R0:%h[0-9]+]], [[A0]]; 851; CHECK-DAG: cvt.rn.f16.s64 [[R1:%h[0-9]+]], [[A1]]; 852; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 853; CHECK: st.param.b32 [func_retval0+0], [[R]]; 854; CHECK: ret; 855define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 { 856 %r = sitofp <2 x i64> %a to <2 x half> 857 ret <2 x half> %r 858} 859 860; CHECK-LABEL: test_uitofp_2xi32_fadd( 861; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_fadd_param_0]; 862; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_uitofp_2xi32_fadd_param_1]; 863; CHECK-DAG: cvt.rn.f16.u32 [[C0:%h[0-9]+]], [[A0]]; 864; CHECK-DAG: cvt.rn.f16.u32 [[C1:%h[0-9]+]], [[A1]]; 865 866; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]} 867; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]]; 868; 869; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 870; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 871; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 872; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 873; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]] 874; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]]; 875; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]]; 876; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 877; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 878; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 879; 880; CHECK: st.param.b32 [func_retval0+0], [[R]]; 881; CHECK: ret; 882define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { 883 %c = uitofp <2 x i32> %a to <2 x half> 884 %r = fadd <2 x half> %b, %c 885 ret <2 x half> %r 886} 887 888; CHECK-LABEL: test_sitofp_2xi32_fadd( 889; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_fadd_param_0]; 890; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_sitofp_2xi32_fadd_param_1]; 891; CHECK-DAG: cvt.rn.f16.s32 [[C0:%h[0-9]+]], [[A0]]; 892; CHECK-DAG: cvt.rn.f16.s32 [[C1:%h[0-9]+]], [[A1]]; 893; 894; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]} 895; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]]; 896; 897; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 898; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 899; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 900; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 901; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]] 902; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]]; 903; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]]; 904; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 905; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 906; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 907; 908; CHECK: st.param.b32 [func_retval0+0], [[R]]; 909; CHECK: ret; 910define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { 911 %c = sitofp <2 x i32> %a to <2 x half> 912 %r = fadd <2 x half> %b, %c 913 ret <2 x half> %r 914} 915 916; CHECK-LABEL: test_fptrunc_2xfloat( 917; CHECK: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_fptrunc_2xfloat_param_0]; 918; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[A0]]; 919; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[A1]]; 920; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 921; CHECK: st.param.b32 [func_retval0+0], [[R]]; 922; CHECK: ret; 923define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 { 924 %r = fptrunc <2 x float> %a to <2 x half> 925 ret <2 x half> %r 926} 927 928; CHECK-LABEL: test_fptrunc_2xdouble( 929; CHECK: ld.param.v2.f64 {[[A0:%fd[0-9]+]], [[A1:%fd[0-9]+]]}, [test_fptrunc_2xdouble_param_0]; 930; CHECK-DAG: cvt.rn.f16.f64 [[R0:%h[0-9]+]], [[A0]]; 931; CHECK-DAG: cvt.rn.f16.f64 [[R1:%h[0-9]+]], [[A1]]; 932; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 933; CHECK: st.param.b32 [func_retval0+0], [[R]]; 934; CHECK: ret; 935define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 { 936 %r = fptrunc <2 x double> %a to <2 x half> 937 ret <2 x half> %r 938} 939 940; CHECK-LABEL: test_fpext_2xfloat( 941; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xfloat_param_0]; 942; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 943; CHECK-DAG: cvt.f32.f16 [[R0:%f[0-9]+]], [[A0]]; 944; CHECK-DAG: cvt.f32.f16 [[R1:%f[0-9]+]], [[A1]]; 945; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; 946; CHECK: ret; 947define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 { 948 %r = fpext <2 x half> %a to <2 x float> 949 ret <2 x float> %r 950} 951 952; CHECK-LABEL: test_fpext_2xdouble( 953; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xdouble_param_0]; 954; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 955; CHECK-DAG: cvt.f64.f16 [[R0:%fd[0-9]+]], [[A0]]; 956; CHECK-DAG: cvt.f64.f16 [[R1:%fd[0-9]+]], [[A1]]; 957; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]}; 958; CHECK: ret; 959define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 { 960 %r = fpext <2 x half> %a to <2 x double> 961 ret <2 x double> %r 962} 963 964 965; CHECK-LABEL: test_bitcast_2xhalf_to_2xi16( 966; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_bitcast_2xhalf_to_2xi16_param_0]; 967; CHECK-DAG: cvt.u16.u32 [[R0:%rs[0-9]+]], [[A]] 968; CHECK-DAG: shr.u32 [[AH:%r[0-9]+]], [[A]], 16 969; CHECK-DAG: cvt.u16.u32 [[R1:%rs[0-9]+]], [[AH]] 970; CHECK: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]} 971; CHECK: ret; 972define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 { 973 %r = bitcast <2 x half> %a to <2 x i16> 974 ret <2 x i16> %r 975} 976 977; CHECK-LABEL: test_bitcast_2xi16_to_2xhalf( 978; CHECK: ld.param.v2.u16 {[[RS0:%rs[0-9]+]], [[RS1:%rs[0-9]+]]}, [test_bitcast_2xi16_to_2xhalf_param_0]; 979; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RS0]]; 980; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RS1]]; 981; CHECK-DAG: shl.b32 [[R1H:%r[0-9]+]], [[R1]], 16; 982; CHECK-DAG: or.b32 [[R1H0L:%r[0-9]+]], [[R0]], [[R1H]]; 983; CHECK: mov.b32 [[R:%hh[0-9]+]], [[R1H0L]]; 984; CHECK: st.param.b32 [func_retval0+0], [[R]]; 985; CHECK: ret; 986define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 { 987 %r = bitcast <2 x i16> %a to <2 x half> 988 ret <2 x half> %r 989} 990 991 992declare <2 x half> @llvm.sqrt.f16(<2 x half> %a) #0 993declare <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b) #0 994declare <2 x half> @llvm.sin.f16(<2 x half> %a) #0 995declare <2 x half> @llvm.cos.f16(<2 x half> %a) #0 996declare <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) #0 997declare <2 x half> @llvm.exp.f16(<2 x half> %a) #0 998declare <2 x half> @llvm.exp2.f16(<2 x half> %a) #0 999declare <2 x half> @llvm.log.f16(<2 x half> %a) #0 1000declare <2 x half> @llvm.log10.f16(<2 x half> %a) #0 1001declare <2 x half> @llvm.log2.f16(<2 x half> %a) #0 1002declare <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 1003declare <2 x half> @llvm.fabs.f16(<2 x half> %a) #0 1004declare <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) #0 1005declare <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) #0 1006declare <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) #0 1007declare <2 x half> @llvm.floor.f16(<2 x half> %a) #0 1008declare <2 x half> @llvm.ceil.f16(<2 x half> %a) #0 1009declare <2 x half> @llvm.trunc.f16(<2 x half> %a) #0 1010declare <2 x half> @llvm.rint.f16(<2 x half> %a) #0 1011declare <2 x half> @llvm.nearbyint.f16(<2 x half> %a) #0 1012declare <2 x half> @llvm.round.f16(<2 x half> %a) #0 1013declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 1014 1015; CHECK-LABEL: test_sqrt( 1016; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sqrt_param_0]; 1017; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1018; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1019; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1020; CHECK-DAG: sqrt.rn.f32 [[RF0:%f[0-9]+]], [[AF0]]; 1021; CHECK-DAG: sqrt.rn.f32 [[RF1:%f[0-9]+]], [[AF1]]; 1022; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1023; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1024; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1025; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1026; CHECK: ret; 1027define <2 x half> @test_sqrt(<2 x half> %a) #0 { 1028 %r = call <2 x half> @llvm.sqrt.f16(<2 x half> %a) 1029 ret <2 x half> %r 1030} 1031 1032;;; Can't do this yet: requires libcall. 1033; XCHECK-LABEL: test_powi( 1034;define <2 x half> @test_powi(<2 x half> %a, <2 x i32> %b) #0 { 1035; %r = call <2 x half> @llvm.powi.f16(<2 x half> %a, <2 x i32> %b) 1036; ret <2 x half> %r 1037;} 1038 1039; CHECK-LABEL: test_sin( 1040; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sin_param_0]; 1041; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1042; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1043; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1044; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]]; 1045; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]]; 1046; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1047; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1048; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1049; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1050; CHECK: ret; 1051define <2 x half> @test_sin(<2 x half> %a) #0 #1 { 1052 %r = call <2 x half> @llvm.sin.f16(<2 x half> %a) 1053 ret <2 x half> %r 1054} 1055 1056; CHECK-LABEL: test_cos( 1057; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_cos_param_0]; 1058; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1059; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1060; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1061; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]]; 1062; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]]; 1063; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1064; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1065; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1066; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1067; CHECK: ret; 1068define <2 x half> @test_cos(<2 x half> %a) #0 #1 { 1069 %r = call <2 x half> @llvm.cos.f16(<2 x half> %a) 1070 ret <2 x half> %r 1071} 1072 1073;;; Can't do this yet: requires libcall. 1074; XCHECK-LABEL: test_pow( 1075;define <2 x half> @test_pow(<2 x half> %a, <2 x half> %b) #0 { 1076; %r = call <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) 1077; ret <2 x half> %r 1078;} 1079 1080;;; Can't do this yet: requires libcall. 1081; XCHECK-LABEL: test_exp( 1082;define <2 x half> @test_exp(<2 x half> %a) #0 { 1083; %r = call <2 x half> @llvm.exp.f16(<2 x half> %a) 1084; ret <2 x half> %r 1085;} 1086 1087;;; Can't do this yet: requires libcall. 1088; XCHECK-LABEL: test_exp2( 1089;define <2 x half> @test_exp2(<2 x half> %a) #0 { 1090; %r = call <2 x half> @llvm.exp2.f16(<2 x half> %a) 1091; ret <2 x half> %r 1092;} 1093 1094;;; Can't do this yet: requires libcall. 1095; XCHECK-LABEL: test_log( 1096;define <2 x half> @test_log(<2 x half> %a) #0 { 1097; %r = call <2 x half> @llvm.log.f16(<2 x half> %a) 1098; ret <2 x half> %r 1099;} 1100 1101;;; Can't do this yet: requires libcall. 1102; XCHECK-LABEL: test_log10( 1103;define <2 x half> @test_log10(<2 x half> %a) #0 { 1104; %r = call <2 x half> @llvm.log10.f16(<2 x half> %a) 1105; ret <2 x half> %r 1106;} 1107 1108;;; Can't do this yet: requires libcall. 1109; XCHECK-LABEL: test_log2( 1110;define <2 x half> @test_log2(<2 x half> %a) #0 { 1111; %r = call <2 x half> @llvm.log2.f16(<2 x half> %a) 1112; ret <2 x half> %r 1113;} 1114 1115; CHECK-LABEL: test_fma( 1116; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fma_param_0]; 1117; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fma_param_1]; 1118; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fma_param_2]; 1119; 1120; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]]; 1121; 1122; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1123; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1124; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] 1125; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 1126; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 1127; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 1128; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 1129; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 1130; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 1131; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]]; 1132; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]]; 1133; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 1134; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 1135; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1136 1137; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1138; CHECK: ret 1139define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { 1140 %r = call <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) 1141 ret <2 x half> %r 1142} 1143 1144; CHECK-LABEL: test_fabs( 1145; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fabs_param_0]; 1146; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1147; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1148; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1149; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]]; 1150; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]]; 1151; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1152; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1153; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1154; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1155; CHECK: ret; 1156define <2 x half> @test_fabs(<2 x half> %a) #0 { 1157 %r = call <2 x half> @llvm.fabs.f16(<2 x half> %a) 1158 ret <2 x half> %r 1159} 1160 1161; CHECK-LABEL: test_minnum( 1162; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_minnum_param_0]; 1163; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_minnum_param_1]; 1164; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1165; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1166; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1167; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1168; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]]; 1169; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]]; 1170; CHECK-DAG: min.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]]; 1171; CHECK-DAG: min.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]]; 1172; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1173; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1174; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1175; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1176; CHECK: ret; 1177define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 { 1178 %r = call <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) 1179 ret <2 x half> %r 1180} 1181 1182; CHECK-LABEL: test_maxnum( 1183; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_maxnum_param_0]; 1184; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_maxnum_param_1]; 1185; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1186; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1187; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; 1188; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; 1189; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]]; 1190; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]]; 1191; CHECK-DAG: max.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]]; 1192; CHECK-DAG: max.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]]; 1193; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; 1194; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; 1195; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1196; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1197; CHECK: ret; 1198define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 { 1199 %r = call <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) 1200 ret <2 x half> %r 1201} 1202 1203; CHECK-LABEL: test_copysign( 1204; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_param_0]; 1205; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_param_1]; 1206; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1207; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1208; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; 1209; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; 1210; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]]; 1211; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]]; 1212; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767; 1213; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767; 1214; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768; 1215; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768; 1216; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]]; 1217; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]]; 1218; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; 1219; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; 1220; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1221; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1222; CHECK: ret; 1223define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 { 1224 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) 1225 ret <2 x half> %r 1226} 1227 1228; CHECK-LABEL: test_copysign_f32( 1229; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f32_param_0]; 1230; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1]; 1231; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1232; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; 1233; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; 1234; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]]; 1235; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]]; 1236; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767; 1237; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767; 1238; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648; 1239; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648; 1240; CHECK-DAG: shr.u32 [[BY0:%r[0-9]+]], [[BX0]], 16; 1241; CHECK-DAG: shr.u32 [[BY1:%r[0-9]+]], [[BX1]], 16; 1242; CHECK-DAG: cvt.u16.u32 [[BZ0:%rs[0-9]+]], [[BY0]]; 1243; CHECK-DAG: cvt.u16.u32 [[BZ1:%rs[0-9]+]], [[BY1]]; 1244; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]]; 1245; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]]; 1246; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; 1247; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; 1248; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1249; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1250; CHECK: ret; 1251define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { 1252 %tb = fptrunc <2 x float> %b to <2 x half> 1253 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb) 1254 ret <2 x half> %r 1255} 1256 1257; CHECK-LABEL: test_copysign_f64( 1258; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f64_param_0]; 1259; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1]; 1260; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1261; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; 1262; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; 1263; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]]; 1264; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]]; 1265; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767; 1266; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767; 1267; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808; 1268; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808; 1269; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48; 1270; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48; 1271; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]]; 1272; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]]; 1273; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]]; 1274; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]]; 1275; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; 1276; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; 1277; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1278; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1279; CHECK: ret; 1280define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { 1281 %tb = fptrunc <2 x double> %b to <2 x half> 1282 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb) 1283 ret <2 x half> %r 1284} 1285 1286; CHECK-LABEL: test_copysign_extended( 1287; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_extended_param_0]; 1288; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_extended_param_1]; 1289; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1290; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1291; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; 1292; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; 1293; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]]; 1294; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]]; 1295; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767; 1296; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767; 1297; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768; 1298; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768; 1299; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]]; 1300; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]]; 1301; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; 1302; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; 1303; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1304; CHECK: mov.b32 {[[RX0:%h[0-9]+]], [[RX1:%h[0-9]+]]}, [[R]] 1305; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]]; 1306; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]]; 1307; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]}; 1308; CHECK: ret; 1309define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 { 1310 %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) 1311 %xr = fpext <2 x half> %r to <2 x float> 1312 ret <2 x float> %xr 1313} 1314 1315; CHECK-LABEL: test_floor( 1316; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_floor_param_0]; 1317; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1318; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1319; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1320; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1321; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1322; CHECK: ret; 1323define <2 x half> @test_floor(<2 x half> %a) #0 { 1324 %r = call <2 x half> @llvm.floor.f16(<2 x half> %a) 1325 ret <2 x half> %r 1326} 1327 1328; CHECK-LABEL: test_ceil( 1329; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_ceil_param_0]; 1330; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1331; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1332; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1333; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1334; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1335; CHECK: ret; 1336define <2 x half> @test_ceil(<2 x half> %a) #0 { 1337 %r = call <2 x half> @llvm.ceil.f16(<2 x half> %a) 1338 ret <2 x half> %r 1339} 1340 1341; CHECK-LABEL: test_trunc( 1342; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_trunc_param_0]; 1343; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1344; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1345; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1346; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1347; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1348; CHECK: ret; 1349define <2 x half> @test_trunc(<2 x half> %a) #0 { 1350 %r = call <2 x half> @llvm.trunc.f16(<2 x half> %a) 1351 ret <2 x half> %r 1352} 1353 1354; CHECK-LABEL: test_rint( 1355; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_rint_param_0]; 1356; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1357; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1358; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1359; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1360; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1361; CHECK: ret; 1362define <2 x half> @test_rint(<2 x half> %a) #0 { 1363 %r = call <2 x half> @llvm.rint.f16(<2 x half> %a) 1364 ret <2 x half> %r 1365} 1366 1367; CHECK-LABEL: test_nearbyint( 1368; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_nearbyint_param_0]; 1369; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1370; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1371; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1372; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1373; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1374; CHECK: ret; 1375define <2 x half> @test_nearbyint(<2 x half> %a) #0 { 1376 %r = call <2 x half> @llvm.nearbyint.f16(<2 x half> %a) 1377 ret <2 x half> %r 1378} 1379 1380; CHECK-LABEL: test_round( 1381; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_round_param_0]; 1382; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; 1383; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; 1384; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; 1385; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1386; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1387; CHECK: ret; 1388define <2 x half> @test_round(<2 x half> %a) #0 { 1389 %r = call <2 x half> @llvm.round.f16(<2 x half> %a) 1390 ret <2 x half> %r 1391} 1392 1393; CHECK-LABEL: test_fmuladd( 1394; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmuladd_param_0]; 1395; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmuladd_param_1]; 1396; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fmuladd_param_2]; 1397; 1398; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]]; 1399; 1400; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] 1401; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] 1402; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] 1403; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] 1404; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] 1405; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 1406; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] 1407; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] 1408; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] 1409; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]]; 1410; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]]; 1411; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] 1412; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] 1413; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} 1414; 1415; CHECK: st.param.b32 [func_retval0+0], [[R]]; 1416; CHECK: ret; 1417define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { 1418 %r = call <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) 1419 ret <2 x half> %r 1420} 1421 1422; CHECK-LABEL: test_shufflevector( 1423; CHECK: mov.b32 {%h1, %h2}, %hh1; 1424; CHECK: mov.b32 %hh2, {%h2, %h1}; 1425define <2 x half> @test_shufflevector(<2 x half> %a) #0 { 1426 %s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0> 1427 ret <2 x half> %s 1428} 1429 1430; CHECK-LABEL: test_insertelement( 1431; CHECK: mov.b32 {%h2, %tmp_hi}, %hh1; 1432; CHECK: mov.b32 %hh2, {%h2, %h1}; 1433define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 { 1434 %i = insertelement <2 x half> %a, half %x, i64 1 1435 ret <2 x half> %i 1436} 1437 1438attributes #0 = { nounwind } 1439attributes #1 = { "unsafe-fp-math" = "true" } 1440