1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s 3 4define amdgpu_kernel void @udiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { 5; CHECK-LABEL: @udiv_i32( 6; CHECK-NEXT: [[TMP1:%.*]] = uitofp i32 [[Y:%.*]] to float 7; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast float 1.000000e+00, [[TMP1]] 8; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], 0x41F0000000000000 9; CHECK-NEXT: [[TMP4:%.*]] = fptoui float [[TMP3]] to i32 10; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 11; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[Y]] to i64 12; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP5]], [[TMP6]] 13; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 14; CHECK-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP7]], 32 15; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 16; CHECK-NEXT: [[TMP11:%.*]] = sub i32 0, [[TMP8]] 17; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP10]], 0 18; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] 19; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 20; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP4]] to i64 21; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP14]], [[TMP15]] 22; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP16]] to i32 23; CHECK-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP16]], 32 24; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 25; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP4]], [[TMP19]] 26; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP4]], [[TMP19]] 27; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP12]], i32 [[TMP20]], i32 [[TMP21]] 28; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 29; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[X:%.*]] to i64 30; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP23]], [[TMP24]] 31; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP25]] to i32 32; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP25]], 32 33; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 34; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[TMP28]], [[Y]] 35; CHECK-NEXT: [[TMP30:%.*]] = sub i32 [[X]], [[TMP29]] 36; CHECK-NEXT: [[TMP31:%.*]] = icmp uge i32 [[TMP30]], [[Y]] 37; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 -1, i32 0 38; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[X]], [[TMP29]] 39; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 40; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP32]], [[TMP34]] 41; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], 0 42; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP28]], 1 43; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP28]], 1 44; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP28]], i32 [[TMP37]] 45; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP33]], i32 [[TMP39]], i32 [[TMP38]] 46; CHECK-NEXT: store i32 [[TMP40]], i32 addrspace(1)* [[OUT:%.*]] 47; CHECK-NEXT: ret void 48; 49 %r = udiv i32 %x, %y 50 store i32 %r, i32 addrspace(1)* %out 51 ret void 52} 53 54define amdgpu_kernel void @urem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { 55; CHECK-LABEL: @urem_i32( 56; CHECK-NEXT: [[TMP1:%.*]] = uitofp i32 [[Y:%.*]] to float 57; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast float 1.000000e+00, [[TMP1]] 58; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], 0x41F0000000000000 59; CHECK-NEXT: [[TMP4:%.*]] = fptoui float [[TMP3]] to i32 60; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 61; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[Y]] to i64 62; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP5]], [[TMP6]] 63; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 64; CHECK-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP7]], 32 65; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 66; CHECK-NEXT: [[TMP11:%.*]] = sub i32 0, [[TMP8]] 67; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP10]], 0 68; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] 69; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 70; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP4]] to i64 71; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP14]], [[TMP15]] 72; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP16]] to i32 73; CHECK-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP16]], 32 74; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 75; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP4]], [[TMP19]] 76; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP4]], [[TMP19]] 77; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP12]], i32 [[TMP20]], i32 [[TMP21]] 78; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 79; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[X:%.*]] to i64 80; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP23]], [[TMP24]] 81; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP25]] to i32 82; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP25]], 32 83; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 84; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[TMP28]], [[Y]] 85; CHECK-NEXT: [[TMP30:%.*]] = sub i32 [[X]], [[TMP29]] 86; CHECK-NEXT: [[TMP31:%.*]] = icmp uge i32 [[TMP30]], [[Y]] 87; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 -1, i32 0 88; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[X]], [[TMP29]] 89; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 90; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP32]], [[TMP34]] 91; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], 0 92; CHECK-NEXT: [[TMP37:%.*]] = sub i32 [[TMP30]], [[Y]] 93; CHECK-NEXT: [[TMP38:%.*]] = add i32 [[TMP30]], [[Y]] 94; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP30]], i32 [[TMP37]] 95; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP33]], i32 [[TMP39]], i32 [[TMP38]] 96; CHECK-NEXT: store i32 [[TMP40]], i32 addrspace(1)* [[OUT:%.*]] 97; CHECK-NEXT: ret void 98; 99 %r = urem i32 %x, %y 100 store i32 %r, i32 addrspace(1)* %out 101 ret void 102} 103 104define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { 105; CHECK-LABEL: @sdiv_i32( 106; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 107; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[Y:%.*]], 31 108; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 109; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[X]], [[TMP1]] 110; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[Y]], [[TMP2]] 111; CHECK-NEXT: [[TMP6:%.*]] = xor i32 [[TMP4]], [[TMP1]] 112; CHECK-NEXT: [[TMP7:%.*]] = xor i32 [[TMP5]], [[TMP2]] 113; CHECK-NEXT: [[TMP8:%.*]] = uitofp i32 [[TMP7]] to float 114; CHECK-NEXT: [[TMP9:%.*]] = fdiv fast float 1.000000e+00, [[TMP8]] 115; CHECK-NEXT: [[TMP10:%.*]] = fmul fast float [[TMP9]], 0x41F0000000000000 116; CHECK-NEXT: [[TMP11:%.*]] = fptoui float [[TMP10]] to i32 117; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 118; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP7]] to i64 119; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP12]], [[TMP13]] 120; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 121; CHECK-NEXT: [[TMP16:%.*]] = lshr i64 [[TMP14]], 32 122; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP16]] to i32 123; CHECK-NEXT: [[TMP18:%.*]] = sub i32 0, [[TMP15]] 124; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP17]], 0 125; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 [[TMP15]] 126; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 127; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64 128; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP21]], [[TMP22]] 129; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 130; CHECK-NEXT: [[TMP25:%.*]] = lshr i64 [[TMP23]], 32 131; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP25]] to i32 132; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[TMP11]], [[TMP26]] 133; CHECK-NEXT: [[TMP28:%.*]] = sub i32 [[TMP11]], [[TMP26]] 134; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP19]], i32 [[TMP27]], i32 [[TMP28]] 135; CHECK-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64 136; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP6]] to i64 137; CHECK-NEXT: [[TMP32:%.*]] = mul i64 [[TMP30]], [[TMP31]] 138; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[TMP32]] to i32 139; CHECK-NEXT: [[TMP34:%.*]] = lshr i64 [[TMP32]], 32 140; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[TMP34]] to i32 141; CHECK-NEXT: [[TMP36:%.*]] = mul i32 [[TMP35]], [[TMP7]] 142; CHECK-NEXT: [[TMP37:%.*]] = sub i32 [[TMP6]], [[TMP36]] 143; CHECK-NEXT: [[TMP38:%.*]] = icmp uge i32 [[TMP37]], [[TMP7]] 144; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 -1, i32 0 145; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP6]], [[TMP36]] 146; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 -1, i32 0 147; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP39]], [[TMP41]] 148; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[TMP42]], 0 149; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP35]], 1 150; CHECK-NEXT: [[TMP45:%.*]] = sub i32 [[TMP35]], 1 151; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP43]], i32 [[TMP35]], i32 [[TMP44]] 152; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP40]], i32 [[TMP46]], i32 [[TMP45]] 153; CHECK-NEXT: [[TMP48:%.*]] = xor i32 [[TMP47]], [[TMP3]] 154; CHECK-NEXT: [[TMP49:%.*]] = sub i32 [[TMP48]], [[TMP3]] 155; CHECK-NEXT: store i32 [[TMP49]], i32 addrspace(1)* [[OUT:%.*]] 156; CHECK-NEXT: ret void 157; 158 %r = sdiv i32 %x, %y 159 store i32 %r, i32 addrspace(1)* %out 160 ret void 161} 162 163define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) { 164; CHECK-LABEL: @srem_i32( 165; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31 166; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[Y:%.*]], 31 167; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[X]], [[TMP1]] 168; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[Y]], [[TMP2]] 169; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP1]] 170; CHECK-NEXT: [[TMP6:%.*]] = xor i32 [[TMP4]], [[TMP2]] 171; CHECK-NEXT: [[TMP7:%.*]] = uitofp i32 [[TMP6]] to float 172; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 173; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP8]], 0x41F0000000000000 174; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP9]] to i32 175; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 176; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP6]] to i64 177; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP11]], [[TMP12]] 178; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[TMP13]] to i32 179; CHECK-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP13]], 32 180; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32 181; CHECK-NEXT: [[TMP17:%.*]] = sub i32 0, [[TMP14]] 182; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], 0 183; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP14]] 184; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 185; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64 186; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP20]], [[TMP21]] 187; CHECK-NEXT: [[TMP23:%.*]] = trunc i64 [[TMP22]] to i32 188; CHECK-NEXT: [[TMP24:%.*]] = lshr i64 [[TMP22]], 32 189; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP24]] to i32 190; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP10]], [[TMP25]] 191; CHECK-NEXT: [[TMP27:%.*]] = sub i32 [[TMP10]], [[TMP25]] 192; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP18]], i32 [[TMP26]], i32 [[TMP27]] 193; CHECK-NEXT: [[TMP29:%.*]] = zext i32 [[TMP28]] to i64 194; CHECK-NEXT: [[TMP30:%.*]] = zext i32 [[TMP5]] to i64 195; CHECK-NEXT: [[TMP31:%.*]] = mul i64 [[TMP29]], [[TMP30]] 196; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32 197; CHECK-NEXT: [[TMP33:%.*]] = lshr i64 [[TMP31]], 32 198; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[TMP33]] to i32 199; CHECK-NEXT: [[TMP35:%.*]] = mul i32 [[TMP34]], [[TMP6]] 200; CHECK-NEXT: [[TMP36:%.*]] = sub i32 [[TMP5]], [[TMP35]] 201; CHECK-NEXT: [[TMP37:%.*]] = icmp uge i32 [[TMP36]], [[TMP6]] 202; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 -1, i32 0 203; CHECK-NEXT: [[TMP39:%.*]] = icmp uge i32 [[TMP5]], [[TMP35]] 204; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 -1, i32 0 205; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP38]], [[TMP40]] 206; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP41]], 0 207; CHECK-NEXT: [[TMP43:%.*]] = sub i32 [[TMP36]], [[TMP6]] 208; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP36]], [[TMP6]] 209; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP42]], i32 [[TMP36]], i32 [[TMP43]] 210; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP39]], i32 [[TMP45]], i32 [[TMP44]] 211; CHECK-NEXT: [[TMP47:%.*]] = xor i32 [[TMP46]], [[TMP1]] 212; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP47]], [[TMP1]] 213; CHECK-NEXT: store i32 [[TMP48]], i32 addrspace(1)* [[OUT:%.*]] 214; CHECK-NEXT: ret void 215; 216 %r = srem i32 %x, %y 217 store i32 %r, i32 addrspace(1)* %out 218 ret void 219} 220 221define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { 222; CHECK-LABEL: @udiv_i16( 223; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32 224; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[Y:%.*]] to i32 225; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP1]] to float 226; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[TMP2]] to float 227; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] 228; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] 229; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) 230; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] 231; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) 232; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 233; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 234; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) 235; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] 236; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 1, i32 0 237; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], [[TMP14]] 238; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 65535 239; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 240; CHECK-NEXT: store i16 [[TMP17]], i16 addrspace(1)* [[OUT:%.*]] 241; CHECK-NEXT: ret void 242; 243 %r = udiv i16 %x, %y 244 store i16 %r, i16 addrspace(1)* %out 245 ret void 246} 247 248define amdgpu_kernel void @urem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { 249; CHECK-LABEL: @urem_i16( 250; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32 251; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[Y:%.*]] to i32 252; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP1]] to float 253; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[TMP2]] to float 254; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] 255; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] 256; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) 257; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] 258; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) 259; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 260; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 261; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) 262; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] 263; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 1, i32 0 264; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], [[TMP14]] 265; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP2]] 266; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP1]], [[TMP16]] 267; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 65535 268; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 269; CHECK-NEXT: store i16 [[TMP19]], i16 addrspace(1)* [[OUT:%.*]] 270; CHECK-NEXT: ret void 271; 272 %r = urem i16 %x, %y 273 store i16 %r, i16 addrspace(1)* %out 274 ret void 275} 276 277define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { 278; CHECK-LABEL: @sdiv_i16( 279; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 280; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[Y:%.*]] to i32 281; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 282; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 283; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 284; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float 285; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float 286; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 287; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] 288; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) 289; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] 290; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) 291; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 292; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) 293; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) 294; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] 295; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 296; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] 297; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 298; CHECK-NEXT: [[TMP20:%.*]] = sext i16 [[TMP19]] to i32 299; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 300; CHECK-NEXT: store i16 [[TMP21]], i16 addrspace(1)* [[OUT:%.*]] 301; CHECK-NEXT: ret void 302; 303 %r = sdiv i16 %x, %y 304 store i16 %r, i16 addrspace(1)* %out 305 ret void 306} 307 308define amdgpu_kernel void @srem_i16(i16 addrspace(1)* %out, i16 %x, i16 %y) { 309; CHECK-LABEL: @srem_i16( 310; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 311; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[Y:%.*]] to i32 312; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 313; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 314; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 315; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float 316; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float 317; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 318; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] 319; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) 320; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] 321; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) 322; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 323; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) 324; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) 325; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] 326; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 327; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] 328; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP2]] 329; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP1]], [[TMP19]] 330; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 331; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 332; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 333; CHECK-NEXT: store i16 [[TMP23]], i16 addrspace(1)* [[OUT:%.*]] 334; CHECK-NEXT: ret void 335; 336 %r = srem i16 %x, %y 337 store i16 %r, i16 addrspace(1)* %out 338 ret void 339} 340 341define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { 342; CHECK-LABEL: @udiv_i8( 343; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 344; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[Y:%.*]] to i32 345; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP1]] to float 346; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[TMP2]] to float 347; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] 348; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] 349; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) 350; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] 351; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) 352; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 353; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 354; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) 355; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] 356; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 1, i32 0 357; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], [[TMP14]] 358; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 255 359; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 360; CHECK-NEXT: store i8 [[TMP17]], i8 addrspace(1)* [[OUT:%.*]] 361; CHECK-NEXT: ret void 362; 363 %r = udiv i8 %x, %y 364 store i8 %r, i8 addrspace(1)* %out 365 ret void 366} 367 368define amdgpu_kernel void @urem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { 369; CHECK-LABEL: @urem_i8( 370; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i32 371; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[Y:%.*]] to i32 372; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP1]] to float 373; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[TMP2]] to float 374; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] 375; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] 376; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) 377; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] 378; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) 379; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 380; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 381; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) 382; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] 383; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 1, i32 0 384; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], [[TMP14]] 385; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP2]] 386; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP1]], [[TMP16]] 387; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 255 388; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i8 389; CHECK-NEXT: store i8 [[TMP19]], i8 addrspace(1)* [[OUT:%.*]] 390; CHECK-NEXT: ret void 391; 392 %r = urem i8 %x, %y 393 store i8 %r, i8 addrspace(1)* %out 394 ret void 395} 396 397define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { 398; CHECK-LABEL: @sdiv_i8( 399; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 400; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[Y:%.*]] to i32 401; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 402; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 403; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 404; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float 405; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float 406; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 407; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] 408; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) 409; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] 410; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) 411; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 412; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) 413; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) 414; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] 415; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 416; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] 417; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i8 418; CHECK-NEXT: [[TMP20:%.*]] = sext i8 [[TMP19]] to i32 419; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i8 420; CHECK-NEXT: store i8 [[TMP21]], i8 addrspace(1)* [[OUT:%.*]] 421; CHECK-NEXT: ret void 422; 423 %r = sdiv i8 %x, %y 424 store i8 %r, i8 addrspace(1)* %out 425 ret void 426} 427 428define amdgpu_kernel void @srem_i8(i8 addrspace(1)* %out, i8 %x, i8 %y) { 429; CHECK-LABEL: @srem_i8( 430; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32 431; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[Y:%.*]] to i32 432; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 433; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 434; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 435; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float 436; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float 437; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 438; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] 439; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) 440; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] 441; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) 442; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 443; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) 444; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) 445; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] 446; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 447; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] 448; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP2]] 449; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP1]], [[TMP19]] 450; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i8 451; CHECK-NEXT: [[TMP22:%.*]] = sext i8 [[TMP21]] to i32 452; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i8 453; CHECK-NEXT: store i8 [[TMP23]], i8 addrspace(1)* [[OUT:%.*]] 454; CHECK-NEXT: ret void 455; 456 %r = srem i8 %x, %y 457 store i8 %r, i8 addrspace(1)* %out 458 ret void 459} 460 461define amdgpu_kernel void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { 462; CHECK-LABEL: @udiv_v4i32( 463; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 0 464; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 0 465; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float 466; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast float 1.000000e+00, [[TMP3]] 467; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41F0000000000000 468; CHECK-NEXT: [[TMP6:%.*]] = fptoui float [[TMP5]] to i32 469; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 470; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP2]] to i64 471; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP7]], [[TMP8]] 472; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 473; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP9]], 32 474; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 475; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP10]] 476; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], 0 477; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP10]] 478; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 479; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP6]] to i64 480; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP16]], [[TMP17]] 481; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 482; CHECK-NEXT: [[TMP20:%.*]] = lshr i64 [[TMP18]], 32 483; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 484; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP6]], [[TMP21]] 485; CHECK-NEXT: [[TMP23:%.*]] = sub i32 [[TMP6]], [[TMP21]] 486; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP14]], i32 [[TMP22]], i32 [[TMP23]] 487; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 488; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP1]] to i64 489; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP25]], [[TMP26]] 490; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 491; CHECK-NEXT: [[TMP29:%.*]] = lshr i64 [[TMP27]], 32 492; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 493; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], [[TMP2]] 494; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP1]], [[TMP31]] 495; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[TMP32]], [[TMP2]] 496; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 497; CHECK-NEXT: [[TMP35:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] 498; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 -1, i32 0 499; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP34]], [[TMP36]] 500; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP37]], 0 501; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP30]], 1 502; CHECK-NEXT: [[TMP40:%.*]] = sub i32 [[TMP30]], 1 503; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP38]], i32 [[TMP30]], i32 [[TMP39]] 504; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], i32 [[TMP41]], i32 [[TMP40]] 505; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> undef, i32 [[TMP42]], i64 0 506; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i32> [[X]], i64 1 507; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[Y]], i64 1 508; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP45]] to float 509; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] 510; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP47]], 0x41F0000000000000 511; CHECK-NEXT: [[TMP49:%.*]] = fptoui float [[TMP48]] to i32 512; CHECK-NEXT: [[TMP50:%.*]] = zext i32 [[TMP49]] to i64 513; CHECK-NEXT: [[TMP51:%.*]] = zext i32 [[TMP45]] to i64 514; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP50]], [[TMP51]] 515; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i32 516; CHECK-NEXT: [[TMP54:%.*]] = lshr i64 [[TMP52]], 32 517; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i32 518; CHECK-NEXT: [[TMP56:%.*]] = sub i32 0, [[TMP53]] 519; CHECK-NEXT: [[TMP57:%.*]] = icmp eq i32 [[TMP55]], 0 520; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP53]] 521; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 522; CHECK-NEXT: [[TMP60:%.*]] = zext i32 [[TMP49]] to i64 523; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP59]], [[TMP60]] 524; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[TMP61]] to i32 525; CHECK-NEXT: [[TMP63:%.*]] = lshr i64 [[TMP61]], 32 526; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 527; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP49]], [[TMP64]] 528; CHECK-NEXT: [[TMP66:%.*]] = sub i32 [[TMP49]], [[TMP64]] 529; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP57]], i32 [[TMP65]], i32 [[TMP66]] 530; CHECK-NEXT: [[TMP68:%.*]] = zext i32 [[TMP67]] to i64 531; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP44]] to i64 532; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP68]], [[TMP69]] 533; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 534; CHECK-NEXT: [[TMP72:%.*]] = lshr i64 [[TMP70]], 32 535; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 536; CHECK-NEXT: [[TMP74:%.*]] = mul i32 [[TMP73]], [[TMP45]] 537; CHECK-NEXT: [[TMP75:%.*]] = sub i32 [[TMP44]], [[TMP74]] 538; CHECK-NEXT: [[TMP76:%.*]] = icmp uge i32 [[TMP75]], [[TMP45]] 539; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 -1, i32 0 540; CHECK-NEXT: [[TMP78:%.*]] = icmp uge i32 [[TMP44]], [[TMP74]] 541; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 -1, i32 0 542; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP77]], [[TMP79]] 543; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i32 [[TMP80]], 0 544; CHECK-NEXT: [[TMP82:%.*]] = add i32 [[TMP73]], 1 545; CHECK-NEXT: [[TMP83:%.*]] = sub i32 [[TMP73]], 1 546; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP81]], i32 [[TMP73]], i32 [[TMP82]] 547; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP78]], i32 [[TMP84]], i32 [[TMP83]] 548; CHECK-NEXT: [[TMP86:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP85]], i64 1 549; CHECK-NEXT: [[TMP87:%.*]] = extractelement <4 x i32> [[X]], i64 2 550; CHECK-NEXT: [[TMP88:%.*]] = extractelement <4 x i32> [[Y]], i64 2 551; CHECK-NEXT: [[TMP89:%.*]] = uitofp i32 [[TMP88]] to float 552; CHECK-NEXT: [[TMP90:%.*]] = fdiv fast float 1.000000e+00, [[TMP89]] 553; CHECK-NEXT: [[TMP91:%.*]] = fmul fast float [[TMP90]], 0x41F0000000000000 554; CHECK-NEXT: [[TMP92:%.*]] = fptoui float [[TMP91]] to i32 555; CHECK-NEXT: [[TMP93:%.*]] = zext i32 [[TMP92]] to i64 556; CHECK-NEXT: [[TMP94:%.*]] = zext i32 [[TMP88]] to i64 557; CHECK-NEXT: [[TMP95:%.*]] = mul i64 [[TMP93]], [[TMP94]] 558; CHECK-NEXT: [[TMP96:%.*]] = trunc i64 [[TMP95]] to i32 559; CHECK-NEXT: [[TMP97:%.*]] = lshr i64 [[TMP95]], 32 560; CHECK-NEXT: [[TMP98:%.*]] = trunc i64 [[TMP97]] to i32 561; CHECK-NEXT: [[TMP99:%.*]] = sub i32 0, [[TMP96]] 562; CHECK-NEXT: [[TMP100:%.*]] = icmp eq i32 [[TMP98]], 0 563; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[TMP99]], i32 [[TMP96]] 564; CHECK-NEXT: [[TMP102:%.*]] = zext i32 [[TMP101]] to i64 565; CHECK-NEXT: [[TMP103:%.*]] = zext i32 [[TMP92]] to i64 566; CHECK-NEXT: [[TMP104:%.*]] = mul i64 [[TMP102]], [[TMP103]] 567; CHECK-NEXT: [[TMP105:%.*]] = trunc i64 [[TMP104]] to i32 568; CHECK-NEXT: [[TMP106:%.*]] = lshr i64 [[TMP104]], 32 569; CHECK-NEXT: [[TMP107:%.*]] = trunc i64 [[TMP106]] to i32 570; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP92]], [[TMP107]] 571; CHECK-NEXT: [[TMP109:%.*]] = sub i32 [[TMP92]], [[TMP107]] 572; CHECK-NEXT: [[TMP110:%.*]] = select i1 [[TMP100]], i32 [[TMP108]], i32 [[TMP109]] 573; CHECK-NEXT: [[TMP111:%.*]] = zext i32 [[TMP110]] to i64 574; CHECK-NEXT: [[TMP112:%.*]] = zext i32 [[TMP87]] to i64 575; CHECK-NEXT: [[TMP113:%.*]] = mul i64 [[TMP111]], [[TMP112]] 576; CHECK-NEXT: [[TMP114:%.*]] = trunc i64 [[TMP113]] to i32 577; CHECK-NEXT: [[TMP115:%.*]] = lshr i64 [[TMP113]], 32 578; CHECK-NEXT: [[TMP116:%.*]] = trunc i64 [[TMP115]] to i32 579; CHECK-NEXT: [[TMP117:%.*]] = mul i32 [[TMP116]], [[TMP88]] 580; CHECK-NEXT: [[TMP118:%.*]] = sub i32 [[TMP87]], [[TMP117]] 581; CHECK-NEXT: [[TMP119:%.*]] = icmp uge i32 [[TMP118]], [[TMP88]] 582; CHECK-NEXT: [[TMP120:%.*]] = select i1 [[TMP119]], i32 -1, i32 0 583; CHECK-NEXT: [[TMP121:%.*]] = icmp uge i32 [[TMP87]], [[TMP117]] 584; CHECK-NEXT: [[TMP122:%.*]] = select i1 [[TMP121]], i32 -1, i32 0 585; CHECK-NEXT: [[TMP123:%.*]] = and i32 [[TMP120]], [[TMP122]] 586; CHECK-NEXT: [[TMP124:%.*]] = icmp eq i32 [[TMP123]], 0 587; CHECK-NEXT: [[TMP125:%.*]] = add i32 [[TMP116]], 1 588; CHECK-NEXT: [[TMP126:%.*]] = sub i32 [[TMP116]], 1 589; CHECK-NEXT: [[TMP127:%.*]] = select i1 [[TMP124]], i32 [[TMP116]], i32 [[TMP125]] 590; CHECK-NEXT: [[TMP128:%.*]] = select i1 [[TMP121]], i32 [[TMP127]], i32 [[TMP126]] 591; CHECK-NEXT: [[TMP129:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP128]], i64 2 592; CHECK-NEXT: [[TMP130:%.*]] = extractelement <4 x i32> [[X]], i64 3 593; CHECK-NEXT: [[TMP131:%.*]] = extractelement <4 x i32> [[Y]], i64 3 594; CHECK-NEXT: [[TMP132:%.*]] = uitofp i32 [[TMP131]] to float 595; CHECK-NEXT: [[TMP133:%.*]] = fdiv fast float 1.000000e+00, [[TMP132]] 596; CHECK-NEXT: [[TMP134:%.*]] = fmul fast float [[TMP133]], 0x41F0000000000000 597; CHECK-NEXT: [[TMP135:%.*]] = fptoui float [[TMP134]] to i32 598; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[TMP135]] to i64 599; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP131]] to i64 600; CHECK-NEXT: [[TMP138:%.*]] = mul i64 [[TMP136]], [[TMP137]] 601; CHECK-NEXT: [[TMP139:%.*]] = trunc i64 [[TMP138]] to i32 602; CHECK-NEXT: [[TMP140:%.*]] = lshr i64 [[TMP138]], 32 603; CHECK-NEXT: [[TMP141:%.*]] = trunc i64 [[TMP140]] to i32 604; CHECK-NEXT: [[TMP142:%.*]] = sub i32 0, [[TMP139]] 605; CHECK-NEXT: [[TMP143:%.*]] = icmp eq i32 [[TMP141]], 0 606; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP143]], i32 [[TMP142]], i32 [[TMP139]] 607; CHECK-NEXT: [[TMP145:%.*]] = zext i32 [[TMP144]] to i64 608; CHECK-NEXT: [[TMP146:%.*]] = zext i32 [[TMP135]] to i64 609; CHECK-NEXT: [[TMP147:%.*]] = mul i64 [[TMP145]], [[TMP146]] 610; CHECK-NEXT: [[TMP148:%.*]] = trunc i64 [[TMP147]] to i32 611; CHECK-NEXT: [[TMP149:%.*]] = lshr i64 [[TMP147]], 32 612; CHECK-NEXT: [[TMP150:%.*]] = trunc i64 [[TMP149]] to i32 613; CHECK-NEXT: [[TMP151:%.*]] = add i32 [[TMP135]], [[TMP150]] 614; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP135]], [[TMP150]] 615; CHECK-NEXT: [[TMP153:%.*]] = select i1 [[TMP143]], i32 [[TMP151]], i32 [[TMP152]] 616; CHECK-NEXT: [[TMP154:%.*]] = zext i32 [[TMP153]] to i64 617; CHECK-NEXT: [[TMP155:%.*]] = zext i32 [[TMP130]] to i64 618; CHECK-NEXT: [[TMP156:%.*]] = mul i64 [[TMP154]], [[TMP155]] 619; CHECK-NEXT: [[TMP157:%.*]] = trunc i64 [[TMP156]] to i32 620; CHECK-NEXT: [[TMP158:%.*]] = lshr i64 [[TMP156]], 32 621; CHECK-NEXT: [[TMP159:%.*]] = trunc i64 [[TMP158]] to i32 622; CHECK-NEXT: [[TMP160:%.*]] = mul i32 [[TMP159]], [[TMP131]] 623; CHECK-NEXT: [[TMP161:%.*]] = sub i32 [[TMP130]], [[TMP160]] 624; CHECK-NEXT: [[TMP162:%.*]] = icmp uge i32 [[TMP161]], [[TMP131]] 625; CHECK-NEXT: [[TMP163:%.*]] = select i1 [[TMP162]], i32 -1, i32 0 626; CHECK-NEXT: [[TMP164:%.*]] = icmp uge i32 [[TMP130]], [[TMP160]] 627; CHECK-NEXT: [[TMP165:%.*]] = select i1 [[TMP164]], i32 -1, i32 0 628; CHECK-NEXT: [[TMP166:%.*]] = and i32 [[TMP163]], [[TMP165]] 629; CHECK-NEXT: [[TMP167:%.*]] = icmp eq i32 [[TMP166]], 0 630; CHECK-NEXT: [[TMP168:%.*]] = add i32 [[TMP159]], 1 631; CHECK-NEXT: [[TMP169:%.*]] = sub i32 [[TMP159]], 1 632; CHECK-NEXT: [[TMP170:%.*]] = select i1 [[TMP167]], i32 [[TMP159]], i32 [[TMP168]] 633; CHECK-NEXT: [[TMP171:%.*]] = select i1 [[TMP164]], i32 [[TMP170]], i32 [[TMP169]] 634; CHECK-NEXT: [[TMP172:%.*]] = insertelement <4 x i32> [[TMP129]], i32 [[TMP171]], i64 3 635; CHECK-NEXT: store <4 x i32> [[TMP172]], <4 x i32> addrspace(1)* [[OUT:%.*]] 636; CHECK-NEXT: ret void 637; 638 %r = udiv <4 x i32> %x, %y 639 store <4 x i32> %r, <4 x i32> addrspace(1)* %out 640 ret void 641} 642 643define amdgpu_kernel void @urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { 644; CHECK-LABEL: @urem_v4i32( 645; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 0 646; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 0 647; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float 648; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast float 1.000000e+00, [[TMP3]] 649; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41F0000000000000 650; CHECK-NEXT: [[TMP6:%.*]] = fptoui float [[TMP5]] to i32 651; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 652; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP2]] to i64 653; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP7]], [[TMP8]] 654; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32 655; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP9]], 32 656; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 657; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP10]] 658; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP12]], 0 659; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP10]] 660; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 661; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP6]] to i64 662; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP16]], [[TMP17]] 663; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 664; CHECK-NEXT: [[TMP20:%.*]] = lshr i64 [[TMP18]], 32 665; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32 666; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP6]], [[TMP21]] 667; CHECK-NEXT: [[TMP23:%.*]] = sub i32 [[TMP6]], [[TMP21]] 668; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP14]], i32 [[TMP22]], i32 [[TMP23]] 669; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 670; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP1]] to i64 671; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP25]], [[TMP26]] 672; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 673; CHECK-NEXT: [[TMP29:%.*]] = lshr i64 [[TMP27]], 32 674; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 675; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], [[TMP2]] 676; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP1]], [[TMP31]] 677; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[TMP32]], [[TMP2]] 678; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 679; CHECK-NEXT: [[TMP35:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] 680; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 -1, i32 0 681; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP34]], [[TMP36]] 682; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP37]], 0 683; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP32]], [[TMP2]] 684; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP32]], [[TMP2]] 685; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP38]], i32 [[TMP32]], i32 [[TMP39]] 686; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], i32 [[TMP41]], i32 [[TMP40]] 687; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> undef, i32 [[TMP42]], i64 0 688; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i32> [[X]], i64 1 689; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[Y]], i64 1 690; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP45]] to float 691; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] 692; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP47]], 0x41F0000000000000 693; CHECK-NEXT: [[TMP49:%.*]] = fptoui float [[TMP48]] to i32 694; CHECK-NEXT: [[TMP50:%.*]] = zext i32 [[TMP49]] to i64 695; CHECK-NEXT: [[TMP51:%.*]] = zext i32 [[TMP45]] to i64 696; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP50]], [[TMP51]] 697; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i32 698; CHECK-NEXT: [[TMP54:%.*]] = lshr i64 [[TMP52]], 32 699; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i32 700; CHECK-NEXT: [[TMP56:%.*]] = sub i32 0, [[TMP53]] 701; CHECK-NEXT: [[TMP57:%.*]] = icmp eq i32 [[TMP55]], 0 702; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP53]] 703; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 704; CHECK-NEXT: [[TMP60:%.*]] = zext i32 [[TMP49]] to i64 705; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP59]], [[TMP60]] 706; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[TMP61]] to i32 707; CHECK-NEXT: [[TMP63:%.*]] = lshr i64 [[TMP61]], 32 708; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 709; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP49]], [[TMP64]] 710; CHECK-NEXT: [[TMP66:%.*]] = sub i32 [[TMP49]], [[TMP64]] 711; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP57]], i32 [[TMP65]], i32 [[TMP66]] 712; CHECK-NEXT: [[TMP68:%.*]] = zext i32 [[TMP67]] to i64 713; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP44]] to i64 714; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP68]], [[TMP69]] 715; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 716; CHECK-NEXT: [[TMP72:%.*]] = lshr i64 [[TMP70]], 32 717; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 718; CHECK-NEXT: [[TMP74:%.*]] = mul i32 [[TMP73]], [[TMP45]] 719; CHECK-NEXT: [[TMP75:%.*]] = sub i32 [[TMP44]], [[TMP74]] 720; CHECK-NEXT: [[TMP76:%.*]] = icmp uge i32 [[TMP75]], [[TMP45]] 721; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 -1, i32 0 722; CHECK-NEXT: [[TMP78:%.*]] = icmp uge i32 [[TMP44]], [[TMP74]] 723; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 -1, i32 0 724; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP77]], [[TMP79]] 725; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i32 [[TMP80]], 0 726; CHECK-NEXT: [[TMP82:%.*]] = sub i32 [[TMP75]], [[TMP45]] 727; CHECK-NEXT: [[TMP83:%.*]] = add i32 [[TMP75]], [[TMP45]] 728; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP81]], i32 [[TMP75]], i32 [[TMP82]] 729; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP78]], i32 [[TMP84]], i32 [[TMP83]] 730; CHECK-NEXT: [[TMP86:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP85]], i64 1 731; CHECK-NEXT: [[TMP87:%.*]] = extractelement <4 x i32> [[X]], i64 2 732; CHECK-NEXT: [[TMP88:%.*]] = extractelement <4 x i32> [[Y]], i64 2 733; CHECK-NEXT: [[TMP89:%.*]] = uitofp i32 [[TMP88]] to float 734; CHECK-NEXT: [[TMP90:%.*]] = fdiv fast float 1.000000e+00, [[TMP89]] 735; CHECK-NEXT: [[TMP91:%.*]] = fmul fast float [[TMP90]], 0x41F0000000000000 736; CHECK-NEXT: [[TMP92:%.*]] = fptoui float [[TMP91]] to i32 737; CHECK-NEXT: [[TMP93:%.*]] = zext i32 [[TMP92]] to i64 738; CHECK-NEXT: [[TMP94:%.*]] = zext i32 [[TMP88]] to i64 739; CHECK-NEXT: [[TMP95:%.*]] = mul i64 [[TMP93]], [[TMP94]] 740; CHECK-NEXT: [[TMP96:%.*]] = trunc i64 [[TMP95]] to i32 741; CHECK-NEXT: [[TMP97:%.*]] = lshr i64 [[TMP95]], 32 742; CHECK-NEXT: [[TMP98:%.*]] = trunc i64 [[TMP97]] to i32 743; CHECK-NEXT: [[TMP99:%.*]] = sub i32 0, [[TMP96]] 744; CHECK-NEXT: [[TMP100:%.*]] = icmp eq i32 [[TMP98]], 0 745; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[TMP99]], i32 [[TMP96]] 746; CHECK-NEXT: [[TMP102:%.*]] = zext i32 [[TMP101]] to i64 747; CHECK-NEXT: [[TMP103:%.*]] = zext i32 [[TMP92]] to i64 748; CHECK-NEXT: [[TMP104:%.*]] = mul i64 [[TMP102]], [[TMP103]] 749; CHECK-NEXT: [[TMP105:%.*]] = trunc i64 [[TMP104]] to i32 750; CHECK-NEXT: [[TMP106:%.*]] = lshr i64 [[TMP104]], 32 751; CHECK-NEXT: [[TMP107:%.*]] = trunc i64 [[TMP106]] to i32 752; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP92]], [[TMP107]] 753; CHECK-NEXT: [[TMP109:%.*]] = sub i32 [[TMP92]], [[TMP107]] 754; CHECK-NEXT: [[TMP110:%.*]] = select i1 [[TMP100]], i32 [[TMP108]], i32 [[TMP109]] 755; CHECK-NEXT: [[TMP111:%.*]] = zext i32 [[TMP110]] to i64 756; CHECK-NEXT: [[TMP112:%.*]] = zext i32 [[TMP87]] to i64 757; CHECK-NEXT: [[TMP113:%.*]] = mul i64 [[TMP111]], [[TMP112]] 758; CHECK-NEXT: [[TMP114:%.*]] = trunc i64 [[TMP113]] to i32 759; CHECK-NEXT: [[TMP115:%.*]] = lshr i64 [[TMP113]], 32 760; CHECK-NEXT: [[TMP116:%.*]] = trunc i64 [[TMP115]] to i32 761; CHECK-NEXT: [[TMP117:%.*]] = mul i32 [[TMP116]], [[TMP88]] 762; CHECK-NEXT: [[TMP118:%.*]] = sub i32 [[TMP87]], [[TMP117]] 763; CHECK-NEXT: [[TMP119:%.*]] = icmp uge i32 [[TMP118]], [[TMP88]] 764; CHECK-NEXT: [[TMP120:%.*]] = select i1 [[TMP119]], i32 -1, i32 0 765; CHECK-NEXT: [[TMP121:%.*]] = icmp uge i32 [[TMP87]], [[TMP117]] 766; CHECK-NEXT: [[TMP122:%.*]] = select i1 [[TMP121]], i32 -1, i32 0 767; CHECK-NEXT: [[TMP123:%.*]] = and i32 [[TMP120]], [[TMP122]] 768; CHECK-NEXT: [[TMP124:%.*]] = icmp eq i32 [[TMP123]], 0 769; CHECK-NEXT: [[TMP125:%.*]] = sub i32 [[TMP118]], [[TMP88]] 770; CHECK-NEXT: [[TMP126:%.*]] = add i32 [[TMP118]], [[TMP88]] 771; CHECK-NEXT: [[TMP127:%.*]] = select i1 [[TMP124]], i32 [[TMP118]], i32 [[TMP125]] 772; CHECK-NEXT: [[TMP128:%.*]] = select i1 [[TMP121]], i32 [[TMP127]], i32 [[TMP126]] 773; CHECK-NEXT: [[TMP129:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP128]], i64 2 774; CHECK-NEXT: [[TMP130:%.*]] = extractelement <4 x i32> [[X]], i64 3 775; CHECK-NEXT: [[TMP131:%.*]] = extractelement <4 x i32> [[Y]], i64 3 776; CHECK-NEXT: [[TMP132:%.*]] = uitofp i32 [[TMP131]] to float 777; CHECK-NEXT: [[TMP133:%.*]] = fdiv fast float 1.000000e+00, [[TMP132]] 778; CHECK-NEXT: [[TMP134:%.*]] = fmul fast float [[TMP133]], 0x41F0000000000000 779; CHECK-NEXT: [[TMP135:%.*]] = fptoui float [[TMP134]] to i32 780; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[TMP135]] to i64 781; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP131]] to i64 782; CHECK-NEXT: [[TMP138:%.*]] = mul i64 [[TMP136]], [[TMP137]] 783; CHECK-NEXT: [[TMP139:%.*]] = trunc i64 [[TMP138]] to i32 784; CHECK-NEXT: [[TMP140:%.*]] = lshr i64 [[TMP138]], 32 785; CHECK-NEXT: [[TMP141:%.*]] = trunc i64 [[TMP140]] to i32 786; CHECK-NEXT: [[TMP142:%.*]] = sub i32 0, [[TMP139]] 787; CHECK-NEXT: [[TMP143:%.*]] = icmp eq i32 [[TMP141]], 0 788; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP143]], i32 [[TMP142]], i32 [[TMP139]] 789; CHECK-NEXT: [[TMP145:%.*]] = zext i32 [[TMP144]] to i64 790; CHECK-NEXT: [[TMP146:%.*]] = zext i32 [[TMP135]] to i64 791; CHECK-NEXT: [[TMP147:%.*]] = mul i64 [[TMP145]], [[TMP146]] 792; CHECK-NEXT: [[TMP148:%.*]] = trunc i64 [[TMP147]] to i32 793; CHECK-NEXT: [[TMP149:%.*]] = lshr i64 [[TMP147]], 32 794; CHECK-NEXT: [[TMP150:%.*]] = trunc i64 [[TMP149]] to i32 795; CHECK-NEXT: [[TMP151:%.*]] = add i32 [[TMP135]], [[TMP150]] 796; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP135]], [[TMP150]] 797; CHECK-NEXT: [[TMP153:%.*]] = select i1 [[TMP143]], i32 [[TMP151]], i32 [[TMP152]] 798; CHECK-NEXT: [[TMP154:%.*]] = zext i32 [[TMP153]] to i64 799; CHECK-NEXT: [[TMP155:%.*]] = zext i32 [[TMP130]] to i64 800; CHECK-NEXT: [[TMP156:%.*]] = mul i64 [[TMP154]], [[TMP155]] 801; CHECK-NEXT: [[TMP157:%.*]] = trunc i64 [[TMP156]] to i32 802; CHECK-NEXT: [[TMP158:%.*]] = lshr i64 [[TMP156]], 32 803; CHECK-NEXT: [[TMP159:%.*]] = trunc i64 [[TMP158]] to i32 804; CHECK-NEXT: [[TMP160:%.*]] = mul i32 [[TMP159]], [[TMP131]] 805; CHECK-NEXT: [[TMP161:%.*]] = sub i32 [[TMP130]], [[TMP160]] 806; CHECK-NEXT: [[TMP162:%.*]] = icmp uge i32 [[TMP161]], [[TMP131]] 807; CHECK-NEXT: [[TMP163:%.*]] = select i1 [[TMP162]], i32 -1, i32 0 808; CHECK-NEXT: [[TMP164:%.*]] = icmp uge i32 [[TMP130]], [[TMP160]] 809; CHECK-NEXT: [[TMP165:%.*]] = select i1 [[TMP164]], i32 -1, i32 0 810; CHECK-NEXT: [[TMP166:%.*]] = and i32 [[TMP163]], [[TMP165]] 811; CHECK-NEXT: [[TMP167:%.*]] = icmp eq i32 [[TMP166]], 0 812; CHECK-NEXT: [[TMP168:%.*]] = sub i32 [[TMP161]], [[TMP131]] 813; CHECK-NEXT: [[TMP169:%.*]] = add i32 [[TMP161]], [[TMP131]] 814; CHECK-NEXT: [[TMP170:%.*]] = select i1 [[TMP167]], i32 [[TMP161]], i32 [[TMP168]] 815; CHECK-NEXT: [[TMP171:%.*]] = select i1 [[TMP164]], i32 [[TMP170]], i32 [[TMP169]] 816; CHECK-NEXT: [[TMP172:%.*]] = insertelement <4 x i32> [[TMP129]], i32 [[TMP171]], i64 3 817; CHECK-NEXT: store <4 x i32> [[TMP172]], <4 x i32> addrspace(1)* [[OUT:%.*]] 818; CHECK-NEXT: ret void 819; 820 %r = urem <4 x i32> %x, %y 821 store <4 x i32> %r, <4 x i32> addrspace(1)* %out 822 ret void 823} 824 825define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { 826; CHECK-LABEL: @sdiv_v4i32( 827; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 0 828; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 0 829; CHECK-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], 31 830; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP2]], 31 831; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 832; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP1]], [[TMP3]] 833; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP2]], [[TMP4]] 834; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP3]] 835; CHECK-NEXT: [[TMP9:%.*]] = xor i32 [[TMP7]], [[TMP4]] 836; CHECK-NEXT: [[TMP10:%.*]] = uitofp i32 [[TMP9]] to float 837; CHECK-NEXT: [[TMP11:%.*]] = fdiv fast float 1.000000e+00, [[TMP10]] 838; CHECK-NEXT: [[TMP12:%.*]] = fmul fast float [[TMP11]], 0x41F0000000000000 839; CHECK-NEXT: [[TMP13:%.*]] = fptoui float [[TMP12]] to i32 840; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 841; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP9]] to i64 842; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP14]], [[TMP15]] 843; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[TMP16]] to i32 844; CHECK-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP16]], 32 845; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 846; CHECK-NEXT: [[TMP20:%.*]] = sub i32 0, [[TMP17]] 847; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP19]], 0 848; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP17]] 849; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 850; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP13]] to i64 851; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP23]], [[TMP24]] 852; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP25]] to i32 853; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP25]], 32 854; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 855; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP13]], [[TMP28]] 856; CHECK-NEXT: [[TMP30:%.*]] = sub i32 [[TMP13]], [[TMP28]] 857; CHECK-NEXT: [[TMP31:%.*]] = select i1 [[TMP21]], i32 [[TMP29]], i32 [[TMP30]] 858; CHECK-NEXT: [[TMP32:%.*]] = zext i32 [[TMP31]] to i64 859; CHECK-NEXT: [[TMP33:%.*]] = zext i32 [[TMP8]] to i64 860; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP32]], [[TMP33]] 861; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[TMP34]] to i32 862; CHECK-NEXT: [[TMP36:%.*]] = lshr i64 [[TMP34]], 32 863; CHECK-NEXT: [[TMP37:%.*]] = trunc i64 [[TMP36]] to i32 864; CHECK-NEXT: [[TMP38:%.*]] = mul i32 [[TMP37]], [[TMP9]] 865; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP8]], [[TMP38]] 866; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP39]], [[TMP9]] 867; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 -1, i32 0 868; CHECK-NEXT: [[TMP42:%.*]] = icmp uge i32 [[TMP8]], [[TMP38]] 869; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 -1, i32 0 870; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP41]], [[TMP43]] 871; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[TMP44]], 0 872; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP37]], 1 873; CHECK-NEXT: [[TMP47:%.*]] = sub i32 [[TMP37]], 1 874; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP45]], i32 [[TMP37]], i32 [[TMP46]] 875; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP42]], i32 [[TMP48]], i32 [[TMP47]] 876; CHECK-NEXT: [[TMP50:%.*]] = xor i32 [[TMP49]], [[TMP5]] 877; CHECK-NEXT: [[TMP51:%.*]] = sub i32 [[TMP50]], [[TMP5]] 878; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i32> undef, i32 [[TMP51]], i64 0 879; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[X]], i64 1 880; CHECK-NEXT: [[TMP54:%.*]] = extractelement <4 x i32> [[Y]], i64 1 881; CHECK-NEXT: [[TMP55:%.*]] = ashr i32 [[TMP53]], 31 882; CHECK-NEXT: [[TMP56:%.*]] = ashr i32 [[TMP54]], 31 883; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] 884; CHECK-NEXT: [[TMP58:%.*]] = add i32 [[TMP53]], [[TMP55]] 885; CHECK-NEXT: [[TMP59:%.*]] = add i32 [[TMP54]], [[TMP56]] 886; CHECK-NEXT: [[TMP60:%.*]] = xor i32 [[TMP58]], [[TMP55]] 887; CHECK-NEXT: [[TMP61:%.*]] = xor i32 [[TMP59]], [[TMP56]] 888; CHECK-NEXT: [[TMP62:%.*]] = uitofp i32 [[TMP61]] to float 889; CHECK-NEXT: [[TMP63:%.*]] = fdiv fast float 1.000000e+00, [[TMP62]] 890; CHECK-NEXT: [[TMP64:%.*]] = fmul fast float [[TMP63]], 0x41F0000000000000 891; CHECK-NEXT: [[TMP65:%.*]] = fptoui float [[TMP64]] to i32 892; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP65]] to i64 893; CHECK-NEXT: [[TMP67:%.*]] = zext i32 [[TMP61]] to i64 894; CHECK-NEXT: [[TMP68:%.*]] = mul i64 [[TMP66]], [[TMP67]] 895; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i32 896; CHECK-NEXT: [[TMP70:%.*]] = lshr i64 [[TMP68]], 32 897; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 898; CHECK-NEXT: [[TMP72:%.*]] = sub i32 0, [[TMP69]] 899; CHECK-NEXT: [[TMP73:%.*]] = icmp eq i32 [[TMP71]], 0 900; CHECK-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], i32 [[TMP72]], i32 [[TMP69]] 901; CHECK-NEXT: [[TMP75:%.*]] = zext i32 [[TMP74]] to i64 902; CHECK-NEXT: [[TMP76:%.*]] = zext i32 [[TMP65]] to i64 903; CHECK-NEXT: [[TMP77:%.*]] = mul i64 [[TMP75]], [[TMP76]] 904; CHECK-NEXT: [[TMP78:%.*]] = trunc i64 [[TMP77]] to i32 905; CHECK-NEXT: [[TMP79:%.*]] = lshr i64 [[TMP77]], 32 906; CHECK-NEXT: [[TMP80:%.*]] = trunc i64 [[TMP79]] to i32 907; CHECK-NEXT: [[TMP81:%.*]] = add i32 [[TMP65]], [[TMP80]] 908; CHECK-NEXT: [[TMP82:%.*]] = sub i32 [[TMP65]], [[TMP80]] 909; CHECK-NEXT: [[TMP83:%.*]] = select i1 [[TMP73]], i32 [[TMP81]], i32 [[TMP82]] 910; CHECK-NEXT: [[TMP84:%.*]] = zext i32 [[TMP83]] to i64 911; CHECK-NEXT: [[TMP85:%.*]] = zext i32 [[TMP60]] to i64 912; CHECK-NEXT: [[TMP86:%.*]] = mul i64 [[TMP84]], [[TMP85]] 913; CHECK-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i32 914; CHECK-NEXT: [[TMP88:%.*]] = lshr i64 [[TMP86]], 32 915; CHECK-NEXT: [[TMP89:%.*]] = trunc i64 [[TMP88]] to i32 916; CHECK-NEXT: [[TMP90:%.*]] = mul i32 [[TMP89]], [[TMP61]] 917; CHECK-NEXT: [[TMP91:%.*]] = sub i32 [[TMP60]], [[TMP90]] 918; CHECK-NEXT: [[TMP92:%.*]] = icmp uge i32 [[TMP91]], [[TMP61]] 919; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 -1, i32 0 920; CHECK-NEXT: [[TMP94:%.*]] = icmp uge i32 [[TMP60]], [[TMP90]] 921; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 -1, i32 0 922; CHECK-NEXT: [[TMP96:%.*]] = and i32 [[TMP93]], [[TMP95]] 923; CHECK-NEXT: [[TMP97:%.*]] = icmp eq i32 [[TMP96]], 0 924; CHECK-NEXT: [[TMP98:%.*]] = add i32 [[TMP89]], 1 925; CHECK-NEXT: [[TMP99:%.*]] = sub i32 [[TMP89]], 1 926; CHECK-NEXT: [[TMP100:%.*]] = select i1 [[TMP97]], i32 [[TMP89]], i32 [[TMP98]] 927; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP94]], i32 [[TMP100]], i32 [[TMP99]] 928; CHECK-NEXT: [[TMP102:%.*]] = xor i32 [[TMP101]], [[TMP57]] 929; CHECK-NEXT: [[TMP103:%.*]] = sub i32 [[TMP102]], [[TMP57]] 930; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x i32> [[TMP52]], i32 [[TMP103]], i64 1 931; CHECK-NEXT: [[TMP105:%.*]] = extractelement <4 x i32> [[X]], i64 2 932; CHECK-NEXT: [[TMP106:%.*]] = extractelement <4 x i32> [[Y]], i64 2 933; CHECK-NEXT: [[TMP107:%.*]] = ashr i32 [[TMP105]], 31 934; CHECK-NEXT: [[TMP108:%.*]] = ashr i32 [[TMP106]], 31 935; CHECK-NEXT: [[TMP109:%.*]] = xor i32 [[TMP107]], [[TMP108]] 936; CHECK-NEXT: [[TMP110:%.*]] = add i32 [[TMP105]], [[TMP107]] 937; CHECK-NEXT: [[TMP111:%.*]] = add i32 [[TMP106]], [[TMP108]] 938; CHECK-NEXT: [[TMP112:%.*]] = xor i32 [[TMP110]], [[TMP107]] 939; CHECK-NEXT: [[TMP113:%.*]] = xor i32 [[TMP111]], [[TMP108]] 940; CHECK-NEXT: [[TMP114:%.*]] = uitofp i32 [[TMP113]] to float 941; CHECK-NEXT: [[TMP115:%.*]] = fdiv fast float 1.000000e+00, [[TMP114]] 942; CHECK-NEXT: [[TMP116:%.*]] = fmul fast float [[TMP115]], 0x41F0000000000000 943; CHECK-NEXT: [[TMP117:%.*]] = fptoui float [[TMP116]] to i32 944; CHECK-NEXT: [[TMP118:%.*]] = zext i32 [[TMP117]] to i64 945; CHECK-NEXT: [[TMP119:%.*]] = zext i32 [[TMP113]] to i64 946; CHECK-NEXT: [[TMP120:%.*]] = mul i64 [[TMP118]], [[TMP119]] 947; CHECK-NEXT: [[TMP121:%.*]] = trunc i64 [[TMP120]] to i32 948; CHECK-NEXT: [[TMP122:%.*]] = lshr i64 [[TMP120]], 32 949; CHECK-NEXT: [[TMP123:%.*]] = trunc i64 [[TMP122]] to i32 950; CHECK-NEXT: [[TMP124:%.*]] = sub i32 0, [[TMP121]] 951; CHECK-NEXT: [[TMP125:%.*]] = icmp eq i32 [[TMP123]], 0 952; CHECK-NEXT: [[TMP126:%.*]] = select i1 [[TMP125]], i32 [[TMP124]], i32 [[TMP121]] 953; CHECK-NEXT: [[TMP127:%.*]] = zext i32 [[TMP126]] to i64 954; CHECK-NEXT: [[TMP128:%.*]] = zext i32 [[TMP117]] to i64 955; CHECK-NEXT: [[TMP129:%.*]] = mul i64 [[TMP127]], [[TMP128]] 956; CHECK-NEXT: [[TMP130:%.*]] = trunc i64 [[TMP129]] to i32 957; CHECK-NEXT: [[TMP131:%.*]] = lshr i64 [[TMP129]], 32 958; CHECK-NEXT: [[TMP132:%.*]] = trunc i64 [[TMP131]] to i32 959; CHECK-NEXT: [[TMP133:%.*]] = add i32 [[TMP117]], [[TMP132]] 960; CHECK-NEXT: [[TMP134:%.*]] = sub i32 [[TMP117]], [[TMP132]] 961; CHECK-NEXT: [[TMP135:%.*]] = select i1 [[TMP125]], i32 [[TMP133]], i32 [[TMP134]] 962; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[TMP135]] to i64 963; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP112]] to i64 964; CHECK-NEXT: [[TMP138:%.*]] = mul i64 [[TMP136]], [[TMP137]] 965; CHECK-NEXT: [[TMP139:%.*]] = trunc i64 [[TMP138]] to i32 966; CHECK-NEXT: [[TMP140:%.*]] = lshr i64 [[TMP138]], 32 967; CHECK-NEXT: [[TMP141:%.*]] = trunc i64 [[TMP140]] to i32 968; CHECK-NEXT: [[TMP142:%.*]] = mul i32 [[TMP141]], [[TMP113]] 969; CHECK-NEXT: [[TMP143:%.*]] = sub i32 [[TMP112]], [[TMP142]] 970; CHECK-NEXT: [[TMP144:%.*]] = icmp uge i32 [[TMP143]], [[TMP113]] 971; CHECK-NEXT: [[TMP145:%.*]] = select i1 [[TMP144]], i32 -1, i32 0 972; CHECK-NEXT: [[TMP146:%.*]] = icmp uge i32 [[TMP112]], [[TMP142]] 973; CHECK-NEXT: [[TMP147:%.*]] = select i1 [[TMP146]], i32 -1, i32 0 974; CHECK-NEXT: [[TMP148:%.*]] = and i32 [[TMP145]], [[TMP147]] 975; CHECK-NEXT: [[TMP149:%.*]] = icmp eq i32 [[TMP148]], 0 976; CHECK-NEXT: [[TMP150:%.*]] = add i32 [[TMP141]], 1 977; CHECK-NEXT: [[TMP151:%.*]] = sub i32 [[TMP141]], 1 978; CHECK-NEXT: [[TMP152:%.*]] = select i1 [[TMP149]], i32 [[TMP141]], i32 [[TMP150]] 979; CHECK-NEXT: [[TMP153:%.*]] = select i1 [[TMP146]], i32 [[TMP152]], i32 [[TMP151]] 980; CHECK-NEXT: [[TMP154:%.*]] = xor i32 [[TMP153]], [[TMP109]] 981; CHECK-NEXT: [[TMP155:%.*]] = sub i32 [[TMP154]], [[TMP109]] 982; CHECK-NEXT: [[TMP156:%.*]] = insertelement <4 x i32> [[TMP104]], i32 [[TMP155]], i64 2 983; CHECK-NEXT: [[TMP157:%.*]] = extractelement <4 x i32> [[X]], i64 3 984; CHECK-NEXT: [[TMP158:%.*]] = extractelement <4 x i32> [[Y]], i64 3 985; CHECK-NEXT: [[TMP159:%.*]] = ashr i32 [[TMP157]], 31 986; CHECK-NEXT: [[TMP160:%.*]] = ashr i32 [[TMP158]], 31 987; CHECK-NEXT: [[TMP161:%.*]] = xor i32 [[TMP159]], [[TMP160]] 988; CHECK-NEXT: [[TMP162:%.*]] = add i32 [[TMP157]], [[TMP159]] 989; CHECK-NEXT: [[TMP163:%.*]] = add i32 [[TMP158]], [[TMP160]] 990; CHECK-NEXT: [[TMP164:%.*]] = xor i32 [[TMP162]], [[TMP159]] 991; CHECK-NEXT: [[TMP165:%.*]] = xor i32 [[TMP163]], [[TMP160]] 992; CHECK-NEXT: [[TMP166:%.*]] = uitofp i32 [[TMP165]] to float 993; CHECK-NEXT: [[TMP167:%.*]] = fdiv fast float 1.000000e+00, [[TMP166]] 994; CHECK-NEXT: [[TMP168:%.*]] = fmul fast float [[TMP167]], 0x41F0000000000000 995; CHECK-NEXT: [[TMP169:%.*]] = fptoui float [[TMP168]] to i32 996; CHECK-NEXT: [[TMP170:%.*]] = zext i32 [[TMP169]] to i64 997; CHECK-NEXT: [[TMP171:%.*]] = zext i32 [[TMP165]] to i64 998; CHECK-NEXT: [[TMP172:%.*]] = mul i64 [[TMP170]], [[TMP171]] 999; CHECK-NEXT: [[TMP173:%.*]] = trunc i64 [[TMP172]] to i32 1000; CHECK-NEXT: [[TMP174:%.*]] = lshr i64 [[TMP172]], 32 1001; CHECK-NEXT: [[TMP175:%.*]] = trunc i64 [[TMP174]] to i32 1002; CHECK-NEXT: [[TMP176:%.*]] = sub i32 0, [[TMP173]] 1003; CHECK-NEXT: [[TMP177:%.*]] = icmp eq i32 [[TMP175]], 0 1004; CHECK-NEXT: [[TMP178:%.*]] = select i1 [[TMP177]], i32 [[TMP176]], i32 [[TMP173]] 1005; CHECK-NEXT: [[TMP179:%.*]] = zext i32 [[TMP178]] to i64 1006; CHECK-NEXT: [[TMP180:%.*]] = zext i32 [[TMP169]] to i64 1007; CHECK-NEXT: [[TMP181:%.*]] = mul i64 [[TMP179]], [[TMP180]] 1008; CHECK-NEXT: [[TMP182:%.*]] = trunc i64 [[TMP181]] to i32 1009; CHECK-NEXT: [[TMP183:%.*]] = lshr i64 [[TMP181]], 32 1010; CHECK-NEXT: [[TMP184:%.*]] = trunc i64 [[TMP183]] to i32 1011; CHECK-NEXT: [[TMP185:%.*]] = add i32 [[TMP169]], [[TMP184]] 1012; CHECK-NEXT: [[TMP186:%.*]] = sub i32 [[TMP169]], [[TMP184]] 1013; CHECK-NEXT: [[TMP187:%.*]] = select i1 [[TMP177]], i32 [[TMP185]], i32 [[TMP186]] 1014; CHECK-NEXT: [[TMP188:%.*]] = zext i32 [[TMP187]] to i64 1015; CHECK-NEXT: [[TMP189:%.*]] = zext i32 [[TMP164]] to i64 1016; CHECK-NEXT: [[TMP190:%.*]] = mul i64 [[TMP188]], [[TMP189]] 1017; CHECK-NEXT: [[TMP191:%.*]] = trunc i64 [[TMP190]] to i32 1018; CHECK-NEXT: [[TMP192:%.*]] = lshr i64 [[TMP190]], 32 1019; CHECK-NEXT: [[TMP193:%.*]] = trunc i64 [[TMP192]] to i32 1020; CHECK-NEXT: [[TMP194:%.*]] = mul i32 [[TMP193]], [[TMP165]] 1021; CHECK-NEXT: [[TMP195:%.*]] = sub i32 [[TMP164]], [[TMP194]] 1022; CHECK-NEXT: [[TMP196:%.*]] = icmp uge i32 [[TMP195]], [[TMP165]] 1023; CHECK-NEXT: [[TMP197:%.*]] = select i1 [[TMP196]], i32 -1, i32 0 1024; CHECK-NEXT: [[TMP198:%.*]] = icmp uge i32 [[TMP164]], [[TMP194]] 1025; CHECK-NEXT: [[TMP199:%.*]] = select i1 [[TMP198]], i32 -1, i32 0 1026; CHECK-NEXT: [[TMP200:%.*]] = and i32 [[TMP197]], [[TMP199]] 1027; CHECK-NEXT: [[TMP201:%.*]] = icmp eq i32 [[TMP200]], 0 1028; CHECK-NEXT: [[TMP202:%.*]] = add i32 [[TMP193]], 1 1029; CHECK-NEXT: [[TMP203:%.*]] = sub i32 [[TMP193]], 1 1030; CHECK-NEXT: [[TMP204:%.*]] = select i1 [[TMP201]], i32 [[TMP193]], i32 [[TMP202]] 1031; CHECK-NEXT: [[TMP205:%.*]] = select i1 [[TMP198]], i32 [[TMP204]], i32 [[TMP203]] 1032; CHECK-NEXT: [[TMP206:%.*]] = xor i32 [[TMP205]], [[TMP161]] 1033; CHECK-NEXT: [[TMP207:%.*]] = sub i32 [[TMP206]], [[TMP161]] 1034; CHECK-NEXT: [[TMP208:%.*]] = insertelement <4 x i32> [[TMP156]], i32 [[TMP207]], i64 3 1035; CHECK-NEXT: store <4 x i32> [[TMP208]], <4 x i32> addrspace(1)* [[OUT:%.*]] 1036; CHECK-NEXT: ret void 1037; 1038 %r = sdiv <4 x i32> %x, %y 1039 store <4 x i32> %r, <4 x i32> addrspace(1)* %out 1040 ret void 1041} 1042 1043define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { 1044; CHECK-LABEL: @srem_v4i32( 1045; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 0 1046; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 0 1047; CHECK-NEXT: [[TMP3:%.*]] = ashr i32 [[TMP1]], 31 1048; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP2]], 31 1049; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP1]], [[TMP3]] 1050; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP2]], [[TMP4]] 1051; CHECK-NEXT: [[TMP7:%.*]] = xor i32 [[TMP5]], [[TMP3]] 1052; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP6]], [[TMP4]] 1053; CHECK-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float 1054; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 1055; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP10]], 0x41F0000000000000 1056; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP11]] to i32 1057; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 1058; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP8]] to i64 1059; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP13]], [[TMP14]] 1060; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32 1061; CHECK-NEXT: [[TMP17:%.*]] = lshr i64 [[TMP15]], 32 1062; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 1063; CHECK-NEXT: [[TMP19:%.*]] = sub i32 0, [[TMP16]] 1064; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], 0 1065; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP16]] 1066; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 1067; CHECK-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 1068; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP22]], [[TMP23]] 1069; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP24]] to i32 1070; CHECK-NEXT: [[TMP26:%.*]] = lshr i64 [[TMP24]], 32 1071; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[TMP26]] to i32 1072; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP12]], [[TMP27]] 1073; CHECK-NEXT: [[TMP29:%.*]] = sub i32 [[TMP12]], [[TMP27]] 1074; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP20]], i32 [[TMP28]], i32 [[TMP29]] 1075; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP30]] to i64 1076; CHECK-NEXT: [[TMP32:%.*]] = zext i32 [[TMP7]] to i64 1077; CHECK-NEXT: [[TMP33:%.*]] = mul i64 [[TMP31]], [[TMP32]] 1078; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[TMP33]] to i32 1079; CHECK-NEXT: [[TMP35:%.*]] = lshr i64 [[TMP33]], 32 1080; CHECK-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i32 1081; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], [[TMP8]] 1082; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP7]], [[TMP37]] 1083; CHECK-NEXT: [[TMP39:%.*]] = icmp uge i32 [[TMP38]], [[TMP8]] 1084; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 -1, i32 0 1085; CHECK-NEXT: [[TMP41:%.*]] = icmp uge i32 [[TMP7]], [[TMP37]] 1086; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 -1, i32 0 1087; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], [[TMP42]] 1088; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], 0 1089; CHECK-NEXT: [[TMP45:%.*]] = sub i32 [[TMP38]], [[TMP8]] 1090; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP38]], [[TMP8]] 1091; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP44]], i32 [[TMP38]], i32 [[TMP45]] 1092; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], i32 [[TMP47]], i32 [[TMP46]] 1093; CHECK-NEXT: [[TMP49:%.*]] = xor i32 [[TMP48]], [[TMP3]] 1094; CHECK-NEXT: [[TMP50:%.*]] = sub i32 [[TMP49]], [[TMP3]] 1095; CHECK-NEXT: [[TMP51:%.*]] = insertelement <4 x i32> undef, i32 [[TMP50]], i64 0 1096; CHECK-NEXT: [[TMP52:%.*]] = extractelement <4 x i32> [[X]], i64 1 1097; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[Y]], i64 1 1098; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP52]], 31 1099; CHECK-NEXT: [[TMP55:%.*]] = ashr i32 [[TMP53]], 31 1100; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[TMP52]], [[TMP54]] 1101; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP53]], [[TMP55]] 1102; CHECK-NEXT: [[TMP58:%.*]] = xor i32 [[TMP56]], [[TMP54]] 1103; CHECK-NEXT: [[TMP59:%.*]] = xor i32 [[TMP57]], [[TMP55]] 1104; CHECK-NEXT: [[TMP60:%.*]] = uitofp i32 [[TMP59]] to float 1105; CHECK-NEXT: [[TMP61:%.*]] = fdiv fast float 1.000000e+00, [[TMP60]] 1106; CHECK-NEXT: [[TMP62:%.*]] = fmul fast float [[TMP61]], 0x41F0000000000000 1107; CHECK-NEXT: [[TMP63:%.*]] = fptoui float [[TMP62]] to i32 1108; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[TMP63]] to i64 1109; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP59]] to i64 1110; CHECK-NEXT: [[TMP66:%.*]] = mul i64 [[TMP64]], [[TMP65]] 1111; CHECK-NEXT: [[TMP67:%.*]] = trunc i64 [[TMP66]] to i32 1112; CHECK-NEXT: [[TMP68:%.*]] = lshr i64 [[TMP66]], 32 1113; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i32 1114; CHECK-NEXT: [[TMP70:%.*]] = sub i32 0, [[TMP67]] 1115; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[TMP69]], 0 1116; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP70]], i32 [[TMP67]] 1117; CHECK-NEXT: [[TMP73:%.*]] = zext i32 [[TMP72]] to i64 1118; CHECK-NEXT: [[TMP74:%.*]] = zext i32 [[TMP63]] to i64 1119; CHECK-NEXT: [[TMP75:%.*]] = mul i64 [[TMP73]], [[TMP74]] 1120; CHECK-NEXT: [[TMP76:%.*]] = trunc i64 [[TMP75]] to i32 1121; CHECK-NEXT: [[TMP77:%.*]] = lshr i64 [[TMP75]], 32 1122; CHECK-NEXT: [[TMP78:%.*]] = trunc i64 [[TMP77]] to i32 1123; CHECK-NEXT: [[TMP79:%.*]] = add i32 [[TMP63]], [[TMP78]] 1124; CHECK-NEXT: [[TMP80:%.*]] = sub i32 [[TMP63]], [[TMP78]] 1125; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP71]], i32 [[TMP79]], i32 [[TMP80]] 1126; CHECK-NEXT: [[TMP82:%.*]] = zext i32 [[TMP81]] to i64 1127; CHECK-NEXT: [[TMP83:%.*]] = zext i32 [[TMP58]] to i64 1128; CHECK-NEXT: [[TMP84:%.*]] = mul i64 [[TMP82]], [[TMP83]] 1129; CHECK-NEXT: [[TMP85:%.*]] = trunc i64 [[TMP84]] to i32 1130; CHECK-NEXT: [[TMP86:%.*]] = lshr i64 [[TMP84]], 32 1131; CHECK-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i32 1132; CHECK-NEXT: [[TMP88:%.*]] = mul i32 [[TMP87]], [[TMP59]] 1133; CHECK-NEXT: [[TMP89:%.*]] = sub i32 [[TMP58]], [[TMP88]] 1134; CHECK-NEXT: [[TMP90:%.*]] = icmp uge i32 [[TMP89]], [[TMP59]] 1135; CHECK-NEXT: [[TMP91:%.*]] = select i1 [[TMP90]], i32 -1, i32 0 1136; CHECK-NEXT: [[TMP92:%.*]] = icmp uge i32 [[TMP58]], [[TMP88]] 1137; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 -1, i32 0 1138; CHECK-NEXT: [[TMP94:%.*]] = and i32 [[TMP91]], [[TMP93]] 1139; CHECK-NEXT: [[TMP95:%.*]] = icmp eq i32 [[TMP94]], 0 1140; CHECK-NEXT: [[TMP96:%.*]] = sub i32 [[TMP89]], [[TMP59]] 1141; CHECK-NEXT: [[TMP97:%.*]] = add i32 [[TMP89]], [[TMP59]] 1142; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP95]], i32 [[TMP89]], i32 [[TMP96]] 1143; CHECK-NEXT: [[TMP99:%.*]] = select i1 [[TMP92]], i32 [[TMP98]], i32 [[TMP97]] 1144; CHECK-NEXT: [[TMP100:%.*]] = xor i32 [[TMP99]], [[TMP54]] 1145; CHECK-NEXT: [[TMP101:%.*]] = sub i32 [[TMP100]], [[TMP54]] 1146; CHECK-NEXT: [[TMP102:%.*]] = insertelement <4 x i32> [[TMP51]], i32 [[TMP101]], i64 1 1147; CHECK-NEXT: [[TMP103:%.*]] = extractelement <4 x i32> [[X]], i64 2 1148; CHECK-NEXT: [[TMP104:%.*]] = extractelement <4 x i32> [[Y]], i64 2 1149; CHECK-NEXT: [[TMP105:%.*]] = ashr i32 [[TMP103]], 31 1150; CHECK-NEXT: [[TMP106:%.*]] = ashr i32 [[TMP104]], 31 1151; CHECK-NEXT: [[TMP107:%.*]] = add i32 [[TMP103]], [[TMP105]] 1152; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP104]], [[TMP106]] 1153; CHECK-NEXT: [[TMP109:%.*]] = xor i32 [[TMP107]], [[TMP105]] 1154; CHECK-NEXT: [[TMP110:%.*]] = xor i32 [[TMP108]], [[TMP106]] 1155; CHECK-NEXT: [[TMP111:%.*]] = uitofp i32 [[TMP110]] to float 1156; CHECK-NEXT: [[TMP112:%.*]] = fdiv fast float 1.000000e+00, [[TMP111]] 1157; CHECK-NEXT: [[TMP113:%.*]] = fmul fast float [[TMP112]], 0x41F0000000000000 1158; CHECK-NEXT: [[TMP114:%.*]] = fptoui float [[TMP113]] to i32 1159; CHECK-NEXT: [[TMP115:%.*]] = zext i32 [[TMP114]] to i64 1160; CHECK-NEXT: [[TMP116:%.*]] = zext i32 [[TMP110]] to i64 1161; CHECK-NEXT: [[TMP117:%.*]] = mul i64 [[TMP115]], [[TMP116]] 1162; CHECK-NEXT: [[TMP118:%.*]] = trunc i64 [[TMP117]] to i32 1163; CHECK-NEXT: [[TMP119:%.*]] = lshr i64 [[TMP117]], 32 1164; CHECK-NEXT: [[TMP120:%.*]] = trunc i64 [[TMP119]] to i32 1165; CHECK-NEXT: [[TMP121:%.*]] = sub i32 0, [[TMP118]] 1166; CHECK-NEXT: [[TMP122:%.*]] = icmp eq i32 [[TMP120]], 0 1167; CHECK-NEXT: [[TMP123:%.*]] = select i1 [[TMP122]], i32 [[TMP121]], i32 [[TMP118]] 1168; CHECK-NEXT: [[TMP124:%.*]] = zext i32 [[TMP123]] to i64 1169; CHECK-NEXT: [[TMP125:%.*]] = zext i32 [[TMP114]] to i64 1170; CHECK-NEXT: [[TMP126:%.*]] = mul i64 [[TMP124]], [[TMP125]] 1171; CHECK-NEXT: [[TMP127:%.*]] = trunc i64 [[TMP126]] to i32 1172; CHECK-NEXT: [[TMP128:%.*]] = lshr i64 [[TMP126]], 32 1173; CHECK-NEXT: [[TMP129:%.*]] = trunc i64 [[TMP128]] to i32 1174; CHECK-NEXT: [[TMP130:%.*]] = add i32 [[TMP114]], [[TMP129]] 1175; CHECK-NEXT: [[TMP131:%.*]] = sub i32 [[TMP114]], [[TMP129]] 1176; CHECK-NEXT: [[TMP132:%.*]] = select i1 [[TMP122]], i32 [[TMP130]], i32 [[TMP131]] 1177; CHECK-NEXT: [[TMP133:%.*]] = zext i32 [[TMP132]] to i64 1178; CHECK-NEXT: [[TMP134:%.*]] = zext i32 [[TMP109]] to i64 1179; CHECK-NEXT: [[TMP135:%.*]] = mul i64 [[TMP133]], [[TMP134]] 1180; CHECK-NEXT: [[TMP136:%.*]] = trunc i64 [[TMP135]] to i32 1181; CHECK-NEXT: [[TMP137:%.*]] = lshr i64 [[TMP135]], 32 1182; CHECK-NEXT: [[TMP138:%.*]] = trunc i64 [[TMP137]] to i32 1183; CHECK-NEXT: [[TMP139:%.*]] = mul i32 [[TMP138]], [[TMP110]] 1184; CHECK-NEXT: [[TMP140:%.*]] = sub i32 [[TMP109]], [[TMP139]] 1185; CHECK-NEXT: [[TMP141:%.*]] = icmp uge i32 [[TMP140]], [[TMP110]] 1186; CHECK-NEXT: [[TMP142:%.*]] = select i1 [[TMP141]], i32 -1, i32 0 1187; CHECK-NEXT: [[TMP143:%.*]] = icmp uge i32 [[TMP109]], [[TMP139]] 1188; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP143]], i32 -1, i32 0 1189; CHECK-NEXT: [[TMP145:%.*]] = and i32 [[TMP142]], [[TMP144]] 1190; CHECK-NEXT: [[TMP146:%.*]] = icmp eq i32 [[TMP145]], 0 1191; CHECK-NEXT: [[TMP147:%.*]] = sub i32 [[TMP140]], [[TMP110]] 1192; CHECK-NEXT: [[TMP148:%.*]] = add i32 [[TMP140]], [[TMP110]] 1193; CHECK-NEXT: [[TMP149:%.*]] = select i1 [[TMP146]], i32 [[TMP140]], i32 [[TMP147]] 1194; CHECK-NEXT: [[TMP150:%.*]] = select i1 [[TMP143]], i32 [[TMP149]], i32 [[TMP148]] 1195; CHECK-NEXT: [[TMP151:%.*]] = xor i32 [[TMP150]], [[TMP105]] 1196; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP151]], [[TMP105]] 1197; CHECK-NEXT: [[TMP153:%.*]] = insertelement <4 x i32> [[TMP102]], i32 [[TMP152]], i64 2 1198; CHECK-NEXT: [[TMP154:%.*]] = extractelement <4 x i32> [[X]], i64 3 1199; CHECK-NEXT: [[TMP155:%.*]] = extractelement <4 x i32> [[Y]], i64 3 1200; CHECK-NEXT: [[TMP156:%.*]] = ashr i32 [[TMP154]], 31 1201; CHECK-NEXT: [[TMP157:%.*]] = ashr i32 [[TMP155]], 31 1202; CHECK-NEXT: [[TMP158:%.*]] = add i32 [[TMP154]], [[TMP156]] 1203; CHECK-NEXT: [[TMP159:%.*]] = add i32 [[TMP155]], [[TMP157]] 1204; CHECK-NEXT: [[TMP160:%.*]] = xor i32 [[TMP158]], [[TMP156]] 1205; CHECK-NEXT: [[TMP161:%.*]] = xor i32 [[TMP159]], [[TMP157]] 1206; CHECK-NEXT: [[TMP162:%.*]] = uitofp i32 [[TMP161]] to float 1207; CHECK-NEXT: [[TMP163:%.*]] = fdiv fast float 1.000000e+00, [[TMP162]] 1208; CHECK-NEXT: [[TMP164:%.*]] = fmul fast float [[TMP163]], 0x41F0000000000000 1209; CHECK-NEXT: [[TMP165:%.*]] = fptoui float [[TMP164]] to i32 1210; CHECK-NEXT: [[TMP166:%.*]] = zext i32 [[TMP165]] to i64 1211; CHECK-NEXT: [[TMP167:%.*]] = zext i32 [[TMP161]] to i64 1212; CHECK-NEXT: [[TMP168:%.*]] = mul i64 [[TMP166]], [[TMP167]] 1213; CHECK-NEXT: [[TMP169:%.*]] = trunc i64 [[TMP168]] to i32 1214; CHECK-NEXT: [[TMP170:%.*]] = lshr i64 [[TMP168]], 32 1215; CHECK-NEXT: [[TMP171:%.*]] = trunc i64 [[TMP170]] to i32 1216; CHECK-NEXT: [[TMP172:%.*]] = sub i32 0, [[TMP169]] 1217; CHECK-NEXT: [[TMP173:%.*]] = icmp eq i32 [[TMP171]], 0 1218; CHECK-NEXT: [[TMP174:%.*]] = select i1 [[TMP173]], i32 [[TMP172]], i32 [[TMP169]] 1219; CHECK-NEXT: [[TMP175:%.*]] = zext i32 [[TMP174]] to i64 1220; CHECK-NEXT: [[TMP176:%.*]] = zext i32 [[TMP165]] to i64 1221; CHECK-NEXT: [[TMP177:%.*]] = mul i64 [[TMP175]], [[TMP176]] 1222; CHECK-NEXT: [[TMP178:%.*]] = trunc i64 [[TMP177]] to i32 1223; CHECK-NEXT: [[TMP179:%.*]] = lshr i64 [[TMP177]], 32 1224; CHECK-NEXT: [[TMP180:%.*]] = trunc i64 [[TMP179]] to i32 1225; CHECK-NEXT: [[TMP181:%.*]] = add i32 [[TMP165]], [[TMP180]] 1226; CHECK-NEXT: [[TMP182:%.*]] = sub i32 [[TMP165]], [[TMP180]] 1227; CHECK-NEXT: [[TMP183:%.*]] = select i1 [[TMP173]], i32 [[TMP181]], i32 [[TMP182]] 1228; CHECK-NEXT: [[TMP184:%.*]] = zext i32 [[TMP183]] to i64 1229; CHECK-NEXT: [[TMP185:%.*]] = zext i32 [[TMP160]] to i64 1230; CHECK-NEXT: [[TMP186:%.*]] = mul i64 [[TMP184]], [[TMP185]] 1231; CHECK-NEXT: [[TMP187:%.*]] = trunc i64 [[TMP186]] to i32 1232; CHECK-NEXT: [[TMP188:%.*]] = lshr i64 [[TMP186]], 32 1233; CHECK-NEXT: [[TMP189:%.*]] = trunc i64 [[TMP188]] to i32 1234; CHECK-NEXT: [[TMP190:%.*]] = mul i32 [[TMP189]], [[TMP161]] 1235; CHECK-NEXT: [[TMP191:%.*]] = sub i32 [[TMP160]], [[TMP190]] 1236; CHECK-NEXT: [[TMP192:%.*]] = icmp uge i32 [[TMP191]], [[TMP161]] 1237; CHECK-NEXT: [[TMP193:%.*]] = select i1 [[TMP192]], i32 -1, i32 0 1238; CHECK-NEXT: [[TMP194:%.*]] = icmp uge i32 [[TMP160]], [[TMP190]] 1239; CHECK-NEXT: [[TMP195:%.*]] = select i1 [[TMP194]], i32 -1, i32 0 1240; CHECK-NEXT: [[TMP196:%.*]] = and i32 [[TMP193]], [[TMP195]] 1241; CHECK-NEXT: [[TMP197:%.*]] = icmp eq i32 [[TMP196]], 0 1242; CHECK-NEXT: [[TMP198:%.*]] = sub i32 [[TMP191]], [[TMP161]] 1243; CHECK-NEXT: [[TMP199:%.*]] = add i32 [[TMP191]], [[TMP161]] 1244; CHECK-NEXT: [[TMP200:%.*]] = select i1 [[TMP197]], i32 [[TMP191]], i32 [[TMP198]] 1245; CHECK-NEXT: [[TMP201:%.*]] = select i1 [[TMP194]], i32 [[TMP200]], i32 [[TMP199]] 1246; CHECK-NEXT: [[TMP202:%.*]] = xor i32 [[TMP201]], [[TMP156]] 1247; CHECK-NEXT: [[TMP203:%.*]] = sub i32 [[TMP202]], [[TMP156]] 1248; CHECK-NEXT: [[TMP204:%.*]] = insertelement <4 x i32> [[TMP153]], i32 [[TMP203]], i64 3 1249; CHECK-NEXT: store <4 x i32> [[TMP204]], <4 x i32> addrspace(1)* [[OUT:%.*]] 1250; CHECK-NEXT: ret void 1251; 1252 %r = srem <4 x i32> %x, %y 1253 store <4 x i32> %r, <4 x i32> addrspace(1)* %out 1254 ret void 1255} 1256 1257define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) { 1258; CHECK-LABEL: @udiv_v4i16( 1259; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[X:%.*]], i64 0 1260; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[Y:%.*]], i64 0 1261; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32 1262; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 1263; CHECK-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP3]] to float 1264; CHECK-NEXT: [[TMP6:%.*]] = uitofp i32 [[TMP4]] to float 1265; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] 1266; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] 1267; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) 1268; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] 1269; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) 1270; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 1271; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) 1272; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) 1273; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] 1274; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 1, i32 0 1275; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]] 1276; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 65535 1277; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 1278; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> undef, i16 [[TMP19]], i64 0 1279; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i16> [[X]], i64 1 1280; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i16> [[Y]], i64 1 1281; CHECK-NEXT: [[TMP23:%.*]] = zext i16 [[TMP21]] to i32 1282; CHECK-NEXT: [[TMP24:%.*]] = zext i16 [[TMP22]] to i32 1283; CHECK-NEXT: [[TMP25:%.*]] = uitofp i32 [[TMP23]] to float 1284; CHECK-NEXT: [[TMP26:%.*]] = uitofp i32 [[TMP24]] to float 1285; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast float 1.000000e+00, [[TMP26]] 1286; CHECK-NEXT: [[TMP28:%.*]] = fmul fast float [[TMP25]], [[TMP27]] 1287; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]]) 1288; CHECK-NEXT: [[TMP30:%.*]] = fsub fast float -0.000000e+00, [[TMP29]] 1289; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]]) 1290; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32 1291; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]]) 1292; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]]) 1293; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]] 1294; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 1, i32 0 1295; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP32]], [[TMP36]] 1296; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 65535 1297; CHECK-NEXT: [[TMP39:%.*]] = trunc i32 [[TMP38]] to i16 1298; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i16> [[TMP20]], i16 [[TMP39]], i64 1 1299; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i16> [[X]], i64 2 1300; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i16> [[Y]], i64 2 1301; CHECK-NEXT: [[TMP43:%.*]] = zext i16 [[TMP41]] to i32 1302; CHECK-NEXT: [[TMP44:%.*]] = zext i16 [[TMP42]] to i32 1303; CHECK-NEXT: [[TMP45:%.*]] = uitofp i32 [[TMP43]] to float 1304; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP44]] to float 1305; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] 1306; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP45]], [[TMP47]] 1307; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]]) 1308; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float -0.000000e+00, [[TMP49]] 1309; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]]) 1310; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32 1311; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]]) 1312; CHECK-NEXT: [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]]) 1313; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]] 1314; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 1, i32 0 1315; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP52]], [[TMP56]] 1316; CHECK-NEXT: [[TMP58:%.*]] = and i32 [[TMP57]], 65535 1317; CHECK-NEXT: [[TMP59:%.*]] = trunc i32 [[TMP58]] to i16 1318; CHECK-NEXT: [[TMP60:%.*]] = insertelement <4 x i16> [[TMP40]], i16 [[TMP59]], i64 2 1319; CHECK-NEXT: [[TMP61:%.*]] = extractelement <4 x i16> [[X]], i64 3 1320; CHECK-NEXT: [[TMP62:%.*]] = extractelement <4 x i16> [[Y]], i64 3 1321; CHECK-NEXT: [[TMP63:%.*]] = zext i16 [[TMP61]] to i32 1322; CHECK-NEXT: [[TMP64:%.*]] = zext i16 [[TMP62]] to i32 1323; CHECK-NEXT: [[TMP65:%.*]] = uitofp i32 [[TMP63]] to float 1324; CHECK-NEXT: [[TMP66:%.*]] = uitofp i32 [[TMP64]] to float 1325; CHECK-NEXT: [[TMP67:%.*]] = fdiv fast float 1.000000e+00, [[TMP66]] 1326; CHECK-NEXT: [[TMP68:%.*]] = fmul fast float [[TMP65]], [[TMP67]] 1327; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.trunc.f32(float [[TMP68]]) 1328; CHECK-NEXT: [[TMP70:%.*]] = fsub fast float -0.000000e+00, [[TMP69]] 1329; CHECK-NEXT: [[TMP71:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP70]], float [[TMP66]], float [[TMP65]]) 1330; CHECK-NEXT: [[TMP72:%.*]] = fptoui float [[TMP69]] to i32 1331; CHECK-NEXT: [[TMP73:%.*]] = call fast float @llvm.fabs.f32(float [[TMP71]]) 1332; CHECK-NEXT: [[TMP74:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) 1333; CHECK-NEXT: [[TMP75:%.*]] = fcmp fast oge float [[TMP73]], [[TMP74]] 1334; CHECK-NEXT: [[TMP76:%.*]] = select i1 [[TMP75]], i32 1, i32 0 1335; CHECK-NEXT: [[TMP77:%.*]] = add i32 [[TMP72]], [[TMP76]] 1336; CHECK-NEXT: [[TMP78:%.*]] = and i32 [[TMP77]], 65535 1337; CHECK-NEXT: [[TMP79:%.*]] = trunc i32 [[TMP78]] to i16 1338; CHECK-NEXT: [[TMP80:%.*]] = insertelement <4 x i16> [[TMP60]], i16 [[TMP79]], i64 3 1339; CHECK-NEXT: store <4 x i16> [[TMP80]], <4 x i16> addrspace(1)* [[OUT:%.*]] 1340; CHECK-NEXT: ret void 1341; 1342 %r = udiv <4 x i16> %x, %y 1343 store <4 x i16> %r, <4 x i16> addrspace(1)* %out 1344 ret void 1345} 1346 1347define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) { 1348; CHECK-LABEL: @urem_v4i16( 1349; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[X:%.*]], i64 0 1350; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[Y:%.*]], i64 0 1351; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32 1352; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 1353; CHECK-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP3]] to float 1354; CHECK-NEXT: [[TMP6:%.*]] = uitofp i32 [[TMP4]] to float 1355; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] 1356; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] 1357; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) 1358; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] 1359; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) 1360; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 1361; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) 1362; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) 1363; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] 1364; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 1, i32 0 1365; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]] 1366; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], [[TMP4]] 1367; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP3]], [[TMP18]] 1368; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 65535 1369; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 1370; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i16> undef, i16 [[TMP21]], i64 0 1371; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i16> [[X]], i64 1 1372; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i16> [[Y]], i64 1 1373; CHECK-NEXT: [[TMP25:%.*]] = zext i16 [[TMP23]] to i32 1374; CHECK-NEXT: [[TMP26:%.*]] = zext i16 [[TMP24]] to i32 1375; CHECK-NEXT: [[TMP27:%.*]] = uitofp i32 [[TMP25]] to float 1376; CHECK-NEXT: [[TMP28:%.*]] = uitofp i32 [[TMP26]] to float 1377; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast float 1.000000e+00, [[TMP28]] 1378; CHECK-NEXT: [[TMP30:%.*]] = fmul fast float [[TMP27]], [[TMP29]] 1379; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]]) 1380; CHECK-NEXT: [[TMP32:%.*]] = fsub fast float -0.000000e+00, [[TMP31]] 1381; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]]) 1382; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32 1383; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) 1384; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]]) 1385; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]] 1386; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 1, i32 0 1387; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP34]], [[TMP38]] 1388; CHECK-NEXT: [[TMP40:%.*]] = mul i32 [[TMP39]], [[TMP26]] 1389; CHECK-NEXT: [[TMP41:%.*]] = sub i32 [[TMP25]], [[TMP40]] 1390; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 65535 1391; CHECK-NEXT: [[TMP43:%.*]] = trunc i32 [[TMP42]] to i16 1392; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP22]], i16 [[TMP43]], i64 1 1393; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i16> [[X]], i64 2 1394; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x i16> [[Y]], i64 2 1395; CHECK-NEXT: [[TMP47:%.*]] = zext i16 [[TMP45]] to i32 1396; CHECK-NEXT: [[TMP48:%.*]] = zext i16 [[TMP46]] to i32 1397; CHECK-NEXT: [[TMP49:%.*]] = uitofp i32 [[TMP47]] to float 1398; CHECK-NEXT: [[TMP50:%.*]] = uitofp i32 [[TMP48]] to float 1399; CHECK-NEXT: [[TMP51:%.*]] = fdiv fast float 1.000000e+00, [[TMP50]] 1400; CHECK-NEXT: [[TMP52:%.*]] = fmul fast float [[TMP49]], [[TMP51]] 1401; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]]) 1402; CHECK-NEXT: [[TMP54:%.*]] = fsub fast float -0.000000e+00, [[TMP53]] 1403; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]]) 1404; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32 1405; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]]) 1406; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]]) 1407; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]] 1408; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 1, i32 0 1409; CHECK-NEXT: [[TMP61:%.*]] = add i32 [[TMP56]], [[TMP60]] 1410; CHECK-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], [[TMP48]] 1411; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP47]], [[TMP62]] 1412; CHECK-NEXT: [[TMP64:%.*]] = and i32 [[TMP63]], 65535 1413; CHECK-NEXT: [[TMP65:%.*]] = trunc i32 [[TMP64]] to i16 1414; CHECK-NEXT: [[TMP66:%.*]] = insertelement <4 x i16> [[TMP44]], i16 [[TMP65]], i64 2 1415; CHECK-NEXT: [[TMP67:%.*]] = extractelement <4 x i16> [[X]], i64 3 1416; CHECK-NEXT: [[TMP68:%.*]] = extractelement <4 x i16> [[Y]], i64 3 1417; CHECK-NEXT: [[TMP69:%.*]] = zext i16 [[TMP67]] to i32 1418; CHECK-NEXT: [[TMP70:%.*]] = zext i16 [[TMP68]] to i32 1419; CHECK-NEXT: [[TMP71:%.*]] = uitofp i32 [[TMP69]] to float 1420; CHECK-NEXT: [[TMP72:%.*]] = uitofp i32 [[TMP70]] to float 1421; CHECK-NEXT: [[TMP73:%.*]] = fdiv fast float 1.000000e+00, [[TMP72]] 1422; CHECK-NEXT: [[TMP74:%.*]] = fmul fast float [[TMP71]], [[TMP73]] 1423; CHECK-NEXT: [[TMP75:%.*]] = call fast float @llvm.trunc.f32(float [[TMP74]]) 1424; CHECK-NEXT: [[TMP76:%.*]] = fsub fast float -0.000000e+00, [[TMP75]] 1425; CHECK-NEXT: [[TMP77:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP76]], float [[TMP72]], float [[TMP71]]) 1426; CHECK-NEXT: [[TMP78:%.*]] = fptoui float [[TMP75]] to i32 1427; CHECK-NEXT: [[TMP79:%.*]] = call fast float @llvm.fabs.f32(float [[TMP77]]) 1428; CHECK-NEXT: [[TMP80:%.*]] = call fast float @llvm.fabs.f32(float [[TMP72]]) 1429; CHECK-NEXT: [[TMP81:%.*]] = fcmp fast oge float [[TMP79]], [[TMP80]] 1430; CHECK-NEXT: [[TMP82:%.*]] = select i1 [[TMP81]], i32 1, i32 0 1431; CHECK-NEXT: [[TMP83:%.*]] = add i32 [[TMP78]], [[TMP82]] 1432; CHECK-NEXT: [[TMP84:%.*]] = mul i32 [[TMP83]], [[TMP70]] 1433; CHECK-NEXT: [[TMP85:%.*]] = sub i32 [[TMP69]], [[TMP84]] 1434; CHECK-NEXT: [[TMP86:%.*]] = and i32 [[TMP85]], 65535 1435; CHECK-NEXT: [[TMP87:%.*]] = trunc i32 [[TMP86]] to i16 1436; CHECK-NEXT: [[TMP88:%.*]] = insertelement <4 x i16> [[TMP66]], i16 [[TMP87]], i64 3 1437; CHECK-NEXT: store <4 x i16> [[TMP88]], <4 x i16> addrspace(1)* [[OUT:%.*]] 1438; CHECK-NEXT: ret void 1439; 1440 %r = urem <4 x i16> %x, %y 1441 store <4 x i16> %r, <4 x i16> addrspace(1)* %out 1442 ret void 1443} 1444 1445define amdgpu_kernel void @sdiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) { 1446; CHECK-LABEL: @sdiv_v4i16( 1447; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[X:%.*]], i64 0 1448; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[Y:%.*]], i64 0 1449; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP1]] to i32 1450; CHECK-NEXT: [[TMP4:%.*]] = sext i16 [[TMP2]] to i32 1451; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 1452; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 1453; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 1454; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float 1455; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float 1456; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 1457; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] 1458; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) 1459; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] 1460; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) 1461; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 1462; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) 1463; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 1464; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] 1465; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 1466; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] 1467; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 1468; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 1469; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 1470; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i16> undef, i16 [[TMP23]], i64 0 1471; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i16> [[X]], i64 1 1472; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i16> [[Y]], i64 1 1473; CHECK-NEXT: [[TMP27:%.*]] = sext i16 [[TMP25]] to i32 1474; CHECK-NEXT: [[TMP28:%.*]] = sext i16 [[TMP26]] to i32 1475; CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]] 1476; CHECK-NEXT: [[TMP30:%.*]] = ashr i32 [[TMP29]], 30 1477; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 1 1478; CHECK-NEXT: [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float 1479; CHECK-NEXT: [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float 1480; CHECK-NEXT: [[TMP34:%.*]] = fdiv fast float 1.000000e+00, [[TMP33]] 1481; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]] 1482; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]]) 1483; CHECK-NEXT: [[TMP37:%.*]] = fsub fast float -0.000000e+00, [[TMP36]] 1484; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]]) 1485; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32 1486; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]]) 1487; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) 1488; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]] 1489; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP31]], i32 0 1490; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP39]], [[TMP43]] 1491; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[TMP44]] to i16 1492; CHECK-NEXT: [[TMP46:%.*]] = sext i16 [[TMP45]] to i32 1493; CHECK-NEXT: [[TMP47:%.*]] = trunc i32 [[TMP46]] to i16 1494; CHECK-NEXT: [[TMP48:%.*]] = insertelement <4 x i16> [[TMP24]], i16 [[TMP47]], i64 1 1495; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i16> [[X]], i64 2 1496; CHECK-NEXT: [[TMP50:%.*]] = extractelement <4 x i16> [[Y]], i64 2 1497; CHECK-NEXT: [[TMP51:%.*]] = sext i16 [[TMP49]] to i32 1498; CHECK-NEXT: [[TMP52:%.*]] = sext i16 [[TMP50]] to i32 1499; CHECK-NEXT: [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]] 1500; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP53]], 30 1501; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 1 1502; CHECK-NEXT: [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float 1503; CHECK-NEXT: [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float 1504; CHECK-NEXT: [[TMP58:%.*]] = fdiv fast float 1.000000e+00, [[TMP57]] 1505; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]] 1506; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]]) 1507; CHECK-NEXT: [[TMP61:%.*]] = fsub fast float -0.000000e+00, [[TMP60]] 1508; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]]) 1509; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32 1510; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]]) 1511; CHECK-NEXT: [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]]) 1512; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]] 1513; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[TMP55]], i32 0 1514; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[TMP63]], [[TMP67]] 1515; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP68]] to i16 1516; CHECK-NEXT: [[TMP70:%.*]] = sext i16 [[TMP69]] to i32 1517; CHECK-NEXT: [[TMP71:%.*]] = trunc i32 [[TMP70]] to i16 1518; CHECK-NEXT: [[TMP72:%.*]] = insertelement <4 x i16> [[TMP48]], i16 [[TMP71]], i64 2 1519; CHECK-NEXT: [[TMP73:%.*]] = extractelement <4 x i16> [[X]], i64 3 1520; CHECK-NEXT: [[TMP74:%.*]] = extractelement <4 x i16> [[Y]], i64 3 1521; CHECK-NEXT: [[TMP75:%.*]] = sext i16 [[TMP73]] to i32 1522; CHECK-NEXT: [[TMP76:%.*]] = sext i16 [[TMP74]] to i32 1523; CHECK-NEXT: [[TMP77:%.*]] = xor i32 [[TMP75]], [[TMP76]] 1524; CHECK-NEXT: [[TMP78:%.*]] = ashr i32 [[TMP77]], 30 1525; CHECK-NEXT: [[TMP79:%.*]] = or i32 [[TMP78]], 1 1526; CHECK-NEXT: [[TMP80:%.*]] = sitofp i32 [[TMP75]] to float 1527; CHECK-NEXT: [[TMP81:%.*]] = sitofp i32 [[TMP76]] to float 1528; CHECK-NEXT: [[TMP82:%.*]] = fdiv fast float 1.000000e+00, [[TMP81]] 1529; CHECK-NEXT: [[TMP83:%.*]] = fmul fast float [[TMP80]], [[TMP82]] 1530; CHECK-NEXT: [[TMP84:%.*]] = call fast float @llvm.trunc.f32(float [[TMP83]]) 1531; CHECK-NEXT: [[TMP85:%.*]] = fsub fast float -0.000000e+00, [[TMP84]] 1532; CHECK-NEXT: [[TMP86:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP85]], float [[TMP81]], float [[TMP80]]) 1533; CHECK-NEXT: [[TMP87:%.*]] = fptosi float [[TMP84]] to i32 1534; CHECK-NEXT: [[TMP88:%.*]] = call fast float @llvm.fabs.f32(float [[TMP86]]) 1535; CHECK-NEXT: [[TMP89:%.*]] = call fast float @llvm.fabs.f32(float [[TMP81]]) 1536; CHECK-NEXT: [[TMP90:%.*]] = fcmp fast oge float [[TMP88]], [[TMP89]] 1537; CHECK-NEXT: [[TMP91:%.*]] = select i1 [[TMP90]], i32 [[TMP79]], i32 0 1538; CHECK-NEXT: [[TMP92:%.*]] = add i32 [[TMP87]], [[TMP91]] 1539; CHECK-NEXT: [[TMP93:%.*]] = trunc i32 [[TMP92]] to i16 1540; CHECK-NEXT: [[TMP94:%.*]] = sext i16 [[TMP93]] to i32 1541; CHECK-NEXT: [[TMP95:%.*]] = trunc i32 [[TMP94]] to i16 1542; CHECK-NEXT: [[TMP96:%.*]] = insertelement <4 x i16> [[TMP72]], i16 [[TMP95]], i64 3 1543; CHECK-NEXT: store <4 x i16> [[TMP96]], <4 x i16> addrspace(1)* [[OUT:%.*]] 1544; CHECK-NEXT: ret void 1545; 1546 %r = sdiv <4 x i16> %x, %y 1547 store <4 x i16> %r, <4 x i16> addrspace(1)* %out 1548 ret void 1549} 1550 1551define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x, <4 x i16> %y) { 1552; CHECK-LABEL: @srem_v4i16( 1553; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[X:%.*]], i64 0 1554; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[Y:%.*]], i64 0 1555; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP1]] to i32 1556; CHECK-NEXT: [[TMP4:%.*]] = sext i16 [[TMP2]] to i32 1557; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 1558; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 1559; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 1560; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float 1561; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float 1562; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 1563; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] 1564; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) 1565; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] 1566; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) 1567; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 1568; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) 1569; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 1570; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] 1571; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 1572; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] 1573; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], [[TMP4]] 1574; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP3]], [[TMP21]] 1575; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 1576; CHECK-NEXT: [[TMP24:%.*]] = sext i16 [[TMP23]] to i32 1577; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 1578; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> undef, i16 [[TMP25]], i64 0 1579; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i16> [[X]], i64 1 1580; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i16> [[Y]], i64 1 1581; CHECK-NEXT: [[TMP29:%.*]] = sext i16 [[TMP27]] to i32 1582; CHECK-NEXT: [[TMP30:%.*]] = sext i16 [[TMP28]] to i32 1583; CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]] 1584; CHECK-NEXT: [[TMP32:%.*]] = ashr i32 [[TMP31]], 30 1585; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP32]], 1 1586; CHECK-NEXT: [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float 1587; CHECK-NEXT: [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float 1588; CHECK-NEXT: [[TMP36:%.*]] = fdiv fast float 1.000000e+00, [[TMP35]] 1589; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]] 1590; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]]) 1591; CHECK-NEXT: [[TMP39:%.*]] = fsub fast float -0.000000e+00, [[TMP38]] 1592; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]]) 1593; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32 1594; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]]) 1595; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]]) 1596; CHECK-NEXT: [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]] 1597; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP33]], i32 0 1598; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP41]], [[TMP45]] 1599; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], [[TMP30]] 1600; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP29]], [[TMP47]] 1601; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[TMP48]] to i16 1602; CHECK-NEXT: [[TMP50:%.*]] = sext i16 [[TMP49]] to i32 1603; CHECK-NEXT: [[TMP51:%.*]] = trunc i32 [[TMP50]] to i16 1604; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP51]], i64 1 1605; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i16> [[X]], i64 2 1606; CHECK-NEXT: [[TMP54:%.*]] = extractelement <4 x i16> [[Y]], i64 2 1607; CHECK-NEXT: [[TMP55:%.*]] = sext i16 [[TMP53]] to i32 1608; CHECK-NEXT: [[TMP56:%.*]] = sext i16 [[TMP54]] to i32 1609; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] 1610; CHECK-NEXT: [[TMP58:%.*]] = ashr i32 [[TMP57]], 30 1611; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP58]], 1 1612; CHECK-NEXT: [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float 1613; CHECK-NEXT: [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float 1614; CHECK-NEXT: [[TMP62:%.*]] = fdiv fast float 1.000000e+00, [[TMP61]] 1615; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]] 1616; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]]) 1617; CHECK-NEXT: [[TMP65:%.*]] = fsub fast float -0.000000e+00, [[TMP64]] 1618; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]]) 1619; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32 1620; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) 1621; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]]) 1622; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]] 1623; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[TMP59]], i32 0 1624; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP67]], [[TMP71]] 1625; CHECK-NEXT: [[TMP73:%.*]] = mul i32 [[TMP72]], [[TMP56]] 1626; CHECK-NEXT: [[TMP74:%.*]] = sub i32 [[TMP55]], [[TMP73]] 1627; CHECK-NEXT: [[TMP75:%.*]] = trunc i32 [[TMP74]] to i16 1628; CHECK-NEXT: [[TMP76:%.*]] = sext i16 [[TMP75]] to i32 1629; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[TMP76]] to i16 1630; CHECK-NEXT: [[TMP78:%.*]] = insertelement <4 x i16> [[TMP52]], i16 [[TMP77]], i64 2 1631; CHECK-NEXT: [[TMP79:%.*]] = extractelement <4 x i16> [[X]], i64 3 1632; CHECK-NEXT: [[TMP80:%.*]] = extractelement <4 x i16> [[Y]], i64 3 1633; CHECK-NEXT: [[TMP81:%.*]] = sext i16 [[TMP79]] to i32 1634; CHECK-NEXT: [[TMP82:%.*]] = sext i16 [[TMP80]] to i32 1635; CHECK-NEXT: [[TMP83:%.*]] = xor i32 [[TMP81]], [[TMP82]] 1636; CHECK-NEXT: [[TMP84:%.*]] = ashr i32 [[TMP83]], 30 1637; CHECK-NEXT: [[TMP85:%.*]] = or i32 [[TMP84]], 1 1638; CHECK-NEXT: [[TMP86:%.*]] = sitofp i32 [[TMP81]] to float 1639; CHECK-NEXT: [[TMP87:%.*]] = sitofp i32 [[TMP82]] to float 1640; CHECK-NEXT: [[TMP88:%.*]] = fdiv fast float 1.000000e+00, [[TMP87]] 1641; CHECK-NEXT: [[TMP89:%.*]] = fmul fast float [[TMP86]], [[TMP88]] 1642; CHECK-NEXT: [[TMP90:%.*]] = call fast float @llvm.trunc.f32(float [[TMP89]]) 1643; CHECK-NEXT: [[TMP91:%.*]] = fsub fast float -0.000000e+00, [[TMP90]] 1644; CHECK-NEXT: [[TMP92:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP91]], float [[TMP87]], float [[TMP86]]) 1645; CHECK-NEXT: [[TMP93:%.*]] = fptosi float [[TMP90]] to i32 1646; CHECK-NEXT: [[TMP94:%.*]] = call fast float @llvm.fabs.f32(float [[TMP92]]) 1647; CHECK-NEXT: [[TMP95:%.*]] = call fast float @llvm.fabs.f32(float [[TMP87]]) 1648; CHECK-NEXT: [[TMP96:%.*]] = fcmp fast oge float [[TMP94]], [[TMP95]] 1649; CHECK-NEXT: [[TMP97:%.*]] = select i1 [[TMP96]], i32 [[TMP85]], i32 0 1650; CHECK-NEXT: [[TMP98:%.*]] = add i32 [[TMP93]], [[TMP97]] 1651; CHECK-NEXT: [[TMP99:%.*]] = mul i32 [[TMP98]], [[TMP82]] 1652; CHECK-NEXT: [[TMP100:%.*]] = sub i32 [[TMP81]], [[TMP99]] 1653; CHECK-NEXT: [[TMP101:%.*]] = trunc i32 [[TMP100]] to i16 1654; CHECK-NEXT: [[TMP102:%.*]] = sext i16 [[TMP101]] to i32 1655; CHECK-NEXT: [[TMP103:%.*]] = trunc i32 [[TMP102]] to i16 1656; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x i16> [[TMP78]], i16 [[TMP103]], i64 3 1657; CHECK-NEXT: store <4 x i16> [[TMP104]], <4 x i16> addrspace(1)* [[OUT:%.*]] 1658; CHECK-NEXT: ret void 1659; 1660 %r = srem <4 x i16> %x, %y 1661 store <4 x i16> %r, <4 x i16> addrspace(1)* %out 1662 ret void 1663} 1664 1665define amdgpu_kernel void @udiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { 1666; CHECK-LABEL: @udiv_i3( 1667; CHECK-NEXT: [[TMP1:%.*]] = zext i3 [[X:%.*]] to i32 1668; CHECK-NEXT: [[TMP2:%.*]] = zext i3 [[Y:%.*]] to i32 1669; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP1]] to float 1670; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[TMP2]] to float 1671; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] 1672; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] 1673; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) 1674; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] 1675; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) 1676; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 1677; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 1678; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) 1679; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] 1680; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 1, i32 0 1681; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], [[TMP14]] 1682; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 7 1683; CHECK-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i3 1684; CHECK-NEXT: store i3 [[TMP17]], i3 addrspace(1)* [[OUT:%.*]] 1685; CHECK-NEXT: ret void 1686; 1687 %r = udiv i3 %x, %y 1688 store i3 %r, i3 addrspace(1)* %out 1689 ret void 1690} 1691 1692define amdgpu_kernel void @urem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { 1693; CHECK-LABEL: @urem_i3( 1694; CHECK-NEXT: [[TMP1:%.*]] = zext i3 [[X:%.*]] to i32 1695; CHECK-NEXT: [[TMP2:%.*]] = zext i3 [[Y:%.*]] to i32 1696; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP1]] to float 1697; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[TMP2]] to float 1698; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast float 1.000000e+00, [[TMP4]] 1699; CHECK-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP3]], [[TMP5]] 1700; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) 1701; CHECK-NEXT: [[TMP8:%.*]] = fsub fast float -0.000000e+00, [[TMP7]] 1702; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) 1703; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 1704; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 1705; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) 1706; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] 1707; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 1, i32 0 1708; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP10]], [[TMP14]] 1709; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP2]] 1710; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP1]], [[TMP16]] 1711; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 7 1712; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i3 1713; CHECK-NEXT: store i3 [[TMP19]], i3 addrspace(1)* [[OUT:%.*]] 1714; CHECK-NEXT: ret void 1715; 1716 %r = urem i3 %x, %y 1717 store i3 %r, i3 addrspace(1)* %out 1718 ret void 1719} 1720 1721define amdgpu_kernel void @sdiv_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { 1722; CHECK-LABEL: @sdiv_i3( 1723; CHECK-NEXT: [[TMP1:%.*]] = sext i3 [[X:%.*]] to i32 1724; CHECK-NEXT: [[TMP2:%.*]] = sext i3 [[Y:%.*]] to i32 1725; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 1726; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 1727; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 1728; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float 1729; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float 1730; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 1731; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] 1732; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) 1733; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] 1734; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) 1735; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 1736; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) 1737; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) 1738; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] 1739; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 1740; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] 1741; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i3 1742; CHECK-NEXT: [[TMP20:%.*]] = sext i3 [[TMP19]] to i32 1743; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i3 1744; CHECK-NEXT: store i3 [[TMP21]], i3 addrspace(1)* [[OUT:%.*]] 1745; CHECK-NEXT: ret void 1746; 1747 %r = sdiv i3 %x, %y 1748 store i3 %r, i3 addrspace(1)* %out 1749 ret void 1750} 1751 1752define amdgpu_kernel void @srem_i3(i3 addrspace(1)* %out, i3 %x, i3 %y) { 1753; CHECK-LABEL: @srem_i3( 1754; CHECK-NEXT: [[TMP1:%.*]] = sext i3 [[X:%.*]] to i32 1755; CHECK-NEXT: [[TMP2:%.*]] = sext i3 [[Y:%.*]] to i32 1756; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] 1757; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 1758; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 1759; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float 1760; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float 1761; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast float 1.000000e+00, [[TMP7]] 1762; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] 1763; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) 1764; CHECK-NEXT: [[TMP11:%.*]] = fsub fast float -0.000000e+00, [[TMP10]] 1765; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) 1766; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 1767; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) 1768; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) 1769; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] 1770; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 1771; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] 1772; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP2]] 1773; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP1]], [[TMP19]] 1774; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i3 1775; CHECK-NEXT: [[TMP22:%.*]] = sext i3 [[TMP21]] to i32 1776; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i3 1777; CHECK-NEXT: store i3 [[TMP23]], i3 addrspace(1)* [[OUT:%.*]] 1778; CHECK-NEXT: ret void 1779; 1780 %r = srem i3 %x, %y 1781 store i3 %r, i3 addrspace(1)* %out 1782 ret void 1783} 1784 1785define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) { 1786; CHECK-LABEL: @udiv_v3i16( 1787; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[X:%.*]], i64 0 1788; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i16> [[Y:%.*]], i64 0 1789; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32 1790; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 1791; CHECK-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP3]] to float 1792; CHECK-NEXT: [[TMP6:%.*]] = uitofp i32 [[TMP4]] to float 1793; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] 1794; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] 1795; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) 1796; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] 1797; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) 1798; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 1799; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) 1800; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) 1801; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] 1802; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 1, i32 0 1803; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]] 1804; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 65535 1805; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 1806; CHECK-NEXT: [[TMP20:%.*]] = insertelement <3 x i16> undef, i16 [[TMP19]], i64 0 1807; CHECK-NEXT: [[TMP21:%.*]] = extractelement <3 x i16> [[X]], i64 1 1808; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x i16> [[Y]], i64 1 1809; CHECK-NEXT: [[TMP23:%.*]] = zext i16 [[TMP21]] to i32 1810; CHECK-NEXT: [[TMP24:%.*]] = zext i16 [[TMP22]] to i32 1811; CHECK-NEXT: [[TMP25:%.*]] = uitofp i32 [[TMP23]] to float 1812; CHECK-NEXT: [[TMP26:%.*]] = uitofp i32 [[TMP24]] to float 1813; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast float 1.000000e+00, [[TMP26]] 1814; CHECK-NEXT: [[TMP28:%.*]] = fmul fast float [[TMP25]], [[TMP27]] 1815; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]]) 1816; CHECK-NEXT: [[TMP30:%.*]] = fsub fast float -0.000000e+00, [[TMP29]] 1817; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]]) 1818; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32 1819; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]]) 1820; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]]) 1821; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]] 1822; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 1, i32 0 1823; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP32]], [[TMP36]] 1824; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 65535 1825; CHECK-NEXT: [[TMP39:%.*]] = trunc i32 [[TMP38]] to i16 1826; CHECK-NEXT: [[TMP40:%.*]] = insertelement <3 x i16> [[TMP20]], i16 [[TMP39]], i64 1 1827; CHECK-NEXT: [[TMP41:%.*]] = extractelement <3 x i16> [[X]], i64 2 1828; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x i16> [[Y]], i64 2 1829; CHECK-NEXT: [[TMP43:%.*]] = zext i16 [[TMP41]] to i32 1830; CHECK-NEXT: [[TMP44:%.*]] = zext i16 [[TMP42]] to i32 1831; CHECK-NEXT: [[TMP45:%.*]] = uitofp i32 [[TMP43]] to float 1832; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP44]] to float 1833; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] 1834; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP45]], [[TMP47]] 1835; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]]) 1836; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float -0.000000e+00, [[TMP49]] 1837; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]]) 1838; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32 1839; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]]) 1840; CHECK-NEXT: [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]]) 1841; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]] 1842; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 1, i32 0 1843; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP52]], [[TMP56]] 1844; CHECK-NEXT: [[TMP58:%.*]] = and i32 [[TMP57]], 65535 1845; CHECK-NEXT: [[TMP59:%.*]] = trunc i32 [[TMP58]] to i16 1846; CHECK-NEXT: [[TMP60:%.*]] = insertelement <3 x i16> [[TMP40]], i16 [[TMP59]], i64 2 1847; CHECK-NEXT: store <3 x i16> [[TMP60]], <3 x i16> addrspace(1)* [[OUT:%.*]] 1848; CHECK-NEXT: ret void 1849; 1850 %r = udiv <3 x i16> %x, %y 1851 store <3 x i16> %r, <3 x i16> addrspace(1)* %out 1852 ret void 1853} 1854 1855define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) { 1856; CHECK-LABEL: @urem_v3i16( 1857; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[X:%.*]], i64 0 1858; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i16> [[Y:%.*]], i64 0 1859; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP1]] to i32 1860; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP2]] to i32 1861; CHECK-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP3]] to float 1862; CHECK-NEXT: [[TMP6:%.*]] = uitofp i32 [[TMP4]] to float 1863; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] 1864; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] 1865; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) 1866; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] 1867; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) 1868; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 1869; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) 1870; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) 1871; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] 1872; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 1, i32 0 1873; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]] 1874; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], [[TMP4]] 1875; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP3]], [[TMP18]] 1876; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 65535 1877; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 1878; CHECK-NEXT: [[TMP22:%.*]] = insertelement <3 x i16> undef, i16 [[TMP21]], i64 0 1879; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x i16> [[X]], i64 1 1880; CHECK-NEXT: [[TMP24:%.*]] = extractelement <3 x i16> [[Y]], i64 1 1881; CHECK-NEXT: [[TMP25:%.*]] = zext i16 [[TMP23]] to i32 1882; CHECK-NEXT: [[TMP26:%.*]] = zext i16 [[TMP24]] to i32 1883; CHECK-NEXT: [[TMP27:%.*]] = uitofp i32 [[TMP25]] to float 1884; CHECK-NEXT: [[TMP28:%.*]] = uitofp i32 [[TMP26]] to float 1885; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast float 1.000000e+00, [[TMP28]] 1886; CHECK-NEXT: [[TMP30:%.*]] = fmul fast float [[TMP27]], [[TMP29]] 1887; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]]) 1888; CHECK-NEXT: [[TMP32:%.*]] = fsub fast float -0.000000e+00, [[TMP31]] 1889; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]]) 1890; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32 1891; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) 1892; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]]) 1893; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]] 1894; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 1, i32 0 1895; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP34]], [[TMP38]] 1896; CHECK-NEXT: [[TMP40:%.*]] = mul i32 [[TMP39]], [[TMP26]] 1897; CHECK-NEXT: [[TMP41:%.*]] = sub i32 [[TMP25]], [[TMP40]] 1898; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 65535 1899; CHECK-NEXT: [[TMP43:%.*]] = trunc i32 [[TMP42]] to i16 1900; CHECK-NEXT: [[TMP44:%.*]] = insertelement <3 x i16> [[TMP22]], i16 [[TMP43]], i64 1 1901; CHECK-NEXT: [[TMP45:%.*]] = extractelement <3 x i16> [[X]], i64 2 1902; CHECK-NEXT: [[TMP46:%.*]] = extractelement <3 x i16> [[Y]], i64 2 1903; CHECK-NEXT: [[TMP47:%.*]] = zext i16 [[TMP45]] to i32 1904; CHECK-NEXT: [[TMP48:%.*]] = zext i16 [[TMP46]] to i32 1905; CHECK-NEXT: [[TMP49:%.*]] = uitofp i32 [[TMP47]] to float 1906; CHECK-NEXT: [[TMP50:%.*]] = uitofp i32 [[TMP48]] to float 1907; CHECK-NEXT: [[TMP51:%.*]] = fdiv fast float 1.000000e+00, [[TMP50]] 1908; CHECK-NEXT: [[TMP52:%.*]] = fmul fast float [[TMP49]], [[TMP51]] 1909; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]]) 1910; CHECK-NEXT: [[TMP54:%.*]] = fsub fast float -0.000000e+00, [[TMP53]] 1911; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]]) 1912; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32 1913; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]]) 1914; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]]) 1915; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]] 1916; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 1, i32 0 1917; CHECK-NEXT: [[TMP61:%.*]] = add i32 [[TMP56]], [[TMP60]] 1918; CHECK-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], [[TMP48]] 1919; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP47]], [[TMP62]] 1920; CHECK-NEXT: [[TMP64:%.*]] = and i32 [[TMP63]], 65535 1921; CHECK-NEXT: [[TMP65:%.*]] = trunc i32 [[TMP64]] to i16 1922; CHECK-NEXT: [[TMP66:%.*]] = insertelement <3 x i16> [[TMP44]], i16 [[TMP65]], i64 2 1923; CHECK-NEXT: store <3 x i16> [[TMP66]], <3 x i16> addrspace(1)* [[OUT:%.*]] 1924; CHECK-NEXT: ret void 1925; 1926 %r = urem <3 x i16> %x, %y 1927 store <3 x i16> %r, <3 x i16> addrspace(1)* %out 1928 ret void 1929} 1930 1931define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) { 1932; CHECK-LABEL: @sdiv_v3i16( 1933; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[X:%.*]], i64 0 1934; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i16> [[Y:%.*]], i64 0 1935; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP1]] to i32 1936; CHECK-NEXT: [[TMP4:%.*]] = sext i16 [[TMP2]] to i32 1937; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 1938; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 1939; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 1940; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float 1941; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float 1942; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 1943; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] 1944; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) 1945; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] 1946; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) 1947; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 1948; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) 1949; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 1950; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] 1951; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 1952; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] 1953; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 1954; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 1955; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 1956; CHECK-NEXT: [[TMP24:%.*]] = insertelement <3 x i16> undef, i16 [[TMP23]], i64 0 1957; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x i16> [[X]], i64 1 1958; CHECK-NEXT: [[TMP26:%.*]] = extractelement <3 x i16> [[Y]], i64 1 1959; CHECK-NEXT: [[TMP27:%.*]] = sext i16 [[TMP25]] to i32 1960; CHECK-NEXT: [[TMP28:%.*]] = sext i16 [[TMP26]] to i32 1961; CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]] 1962; CHECK-NEXT: [[TMP30:%.*]] = ashr i32 [[TMP29]], 30 1963; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 1 1964; CHECK-NEXT: [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float 1965; CHECK-NEXT: [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float 1966; CHECK-NEXT: [[TMP34:%.*]] = fdiv fast float 1.000000e+00, [[TMP33]] 1967; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]] 1968; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]]) 1969; CHECK-NEXT: [[TMP37:%.*]] = fsub fast float -0.000000e+00, [[TMP36]] 1970; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]]) 1971; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32 1972; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]]) 1973; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) 1974; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]] 1975; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP31]], i32 0 1976; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP39]], [[TMP43]] 1977; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[TMP44]] to i16 1978; CHECK-NEXT: [[TMP46:%.*]] = sext i16 [[TMP45]] to i32 1979; CHECK-NEXT: [[TMP47:%.*]] = trunc i32 [[TMP46]] to i16 1980; CHECK-NEXT: [[TMP48:%.*]] = insertelement <3 x i16> [[TMP24]], i16 [[TMP47]], i64 1 1981; CHECK-NEXT: [[TMP49:%.*]] = extractelement <3 x i16> [[X]], i64 2 1982; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x i16> [[Y]], i64 2 1983; CHECK-NEXT: [[TMP51:%.*]] = sext i16 [[TMP49]] to i32 1984; CHECK-NEXT: [[TMP52:%.*]] = sext i16 [[TMP50]] to i32 1985; CHECK-NEXT: [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]] 1986; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP53]], 30 1987; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 1 1988; CHECK-NEXT: [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float 1989; CHECK-NEXT: [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float 1990; CHECK-NEXT: [[TMP58:%.*]] = fdiv fast float 1.000000e+00, [[TMP57]] 1991; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]] 1992; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]]) 1993; CHECK-NEXT: [[TMP61:%.*]] = fsub fast float -0.000000e+00, [[TMP60]] 1994; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]]) 1995; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32 1996; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]]) 1997; CHECK-NEXT: [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]]) 1998; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]] 1999; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[TMP55]], i32 0 2000; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[TMP63]], [[TMP67]] 2001; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP68]] to i16 2002; CHECK-NEXT: [[TMP70:%.*]] = sext i16 [[TMP69]] to i32 2003; CHECK-NEXT: [[TMP71:%.*]] = trunc i32 [[TMP70]] to i16 2004; CHECK-NEXT: [[TMP72:%.*]] = insertelement <3 x i16> [[TMP48]], i16 [[TMP71]], i64 2 2005; CHECK-NEXT: store <3 x i16> [[TMP72]], <3 x i16> addrspace(1)* [[OUT:%.*]] 2006; CHECK-NEXT: ret void 2007; 2008 %r = sdiv <3 x i16> %x, %y 2009 store <3 x i16> %r, <3 x i16> addrspace(1)* %out 2010 ret void 2011} 2012 2013define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x, <3 x i16> %y) { 2014; CHECK-LABEL: @srem_v3i16( 2015; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[X:%.*]], i64 0 2016; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i16> [[Y:%.*]], i64 0 2017; CHECK-NEXT: [[TMP3:%.*]] = sext i16 [[TMP1]] to i32 2018; CHECK-NEXT: [[TMP4:%.*]] = sext i16 [[TMP2]] to i32 2019; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 2020; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 2021; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 2022; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float 2023; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float 2024; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 2025; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] 2026; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) 2027; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] 2028; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) 2029; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 2030; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) 2031; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 2032; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] 2033; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 2034; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] 2035; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], [[TMP4]] 2036; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP3]], [[TMP21]] 2037; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 2038; CHECK-NEXT: [[TMP24:%.*]] = sext i16 [[TMP23]] to i32 2039; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 2040; CHECK-NEXT: [[TMP26:%.*]] = insertelement <3 x i16> undef, i16 [[TMP25]], i64 0 2041; CHECK-NEXT: [[TMP27:%.*]] = extractelement <3 x i16> [[X]], i64 1 2042; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x i16> [[Y]], i64 1 2043; CHECK-NEXT: [[TMP29:%.*]] = sext i16 [[TMP27]] to i32 2044; CHECK-NEXT: [[TMP30:%.*]] = sext i16 [[TMP28]] to i32 2045; CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]] 2046; CHECK-NEXT: [[TMP32:%.*]] = ashr i32 [[TMP31]], 30 2047; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP32]], 1 2048; CHECK-NEXT: [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float 2049; CHECK-NEXT: [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float 2050; CHECK-NEXT: [[TMP36:%.*]] = fdiv fast float 1.000000e+00, [[TMP35]] 2051; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]] 2052; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]]) 2053; CHECK-NEXT: [[TMP39:%.*]] = fsub fast float -0.000000e+00, [[TMP38]] 2054; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]]) 2055; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32 2056; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]]) 2057; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]]) 2058; CHECK-NEXT: [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]] 2059; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP33]], i32 0 2060; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP41]], [[TMP45]] 2061; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], [[TMP30]] 2062; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP29]], [[TMP47]] 2063; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[TMP48]] to i16 2064; CHECK-NEXT: [[TMP50:%.*]] = sext i16 [[TMP49]] to i32 2065; CHECK-NEXT: [[TMP51:%.*]] = trunc i32 [[TMP50]] to i16 2066; CHECK-NEXT: [[TMP52:%.*]] = insertelement <3 x i16> [[TMP26]], i16 [[TMP51]], i64 1 2067; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x i16> [[X]], i64 2 2068; CHECK-NEXT: [[TMP54:%.*]] = extractelement <3 x i16> [[Y]], i64 2 2069; CHECK-NEXT: [[TMP55:%.*]] = sext i16 [[TMP53]] to i32 2070; CHECK-NEXT: [[TMP56:%.*]] = sext i16 [[TMP54]] to i32 2071; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] 2072; CHECK-NEXT: [[TMP58:%.*]] = ashr i32 [[TMP57]], 30 2073; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP58]], 1 2074; CHECK-NEXT: [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float 2075; CHECK-NEXT: [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float 2076; CHECK-NEXT: [[TMP62:%.*]] = fdiv fast float 1.000000e+00, [[TMP61]] 2077; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]] 2078; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]]) 2079; CHECK-NEXT: [[TMP65:%.*]] = fsub fast float -0.000000e+00, [[TMP64]] 2080; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]]) 2081; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32 2082; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) 2083; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]]) 2084; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]] 2085; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[TMP59]], i32 0 2086; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP67]], [[TMP71]] 2087; CHECK-NEXT: [[TMP73:%.*]] = mul i32 [[TMP72]], [[TMP56]] 2088; CHECK-NEXT: [[TMP74:%.*]] = sub i32 [[TMP55]], [[TMP73]] 2089; CHECK-NEXT: [[TMP75:%.*]] = trunc i32 [[TMP74]] to i16 2090; CHECK-NEXT: [[TMP76:%.*]] = sext i16 [[TMP75]] to i32 2091; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[TMP76]] to i16 2092; CHECK-NEXT: [[TMP78:%.*]] = insertelement <3 x i16> [[TMP52]], i16 [[TMP77]], i64 2 2093; CHECK-NEXT: store <3 x i16> [[TMP78]], <3 x i16> addrspace(1)* [[OUT:%.*]] 2094; CHECK-NEXT: ret void 2095; 2096 %r = srem <3 x i16> %x, %y 2097 store <3 x i16> %r, <3 x i16> addrspace(1)* %out 2098 ret void 2099} 2100 2101define amdgpu_kernel void @udiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) { 2102; CHECK-LABEL: @udiv_v3i15( 2103; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i15> [[X:%.*]], i64 0 2104; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i15> [[Y:%.*]], i64 0 2105; CHECK-NEXT: [[TMP3:%.*]] = zext i15 [[TMP1]] to i32 2106; CHECK-NEXT: [[TMP4:%.*]] = zext i15 [[TMP2]] to i32 2107; CHECK-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP3]] to float 2108; CHECK-NEXT: [[TMP6:%.*]] = uitofp i32 [[TMP4]] to float 2109; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] 2110; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] 2111; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) 2112; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] 2113; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) 2114; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 2115; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) 2116; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) 2117; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] 2118; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 1, i32 0 2119; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]] 2120; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 32767 2121; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i15 2122; CHECK-NEXT: [[TMP20:%.*]] = insertelement <3 x i15> undef, i15 [[TMP19]], i64 0 2123; CHECK-NEXT: [[TMP21:%.*]] = extractelement <3 x i15> [[X]], i64 1 2124; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x i15> [[Y]], i64 1 2125; CHECK-NEXT: [[TMP23:%.*]] = zext i15 [[TMP21]] to i32 2126; CHECK-NEXT: [[TMP24:%.*]] = zext i15 [[TMP22]] to i32 2127; CHECK-NEXT: [[TMP25:%.*]] = uitofp i32 [[TMP23]] to float 2128; CHECK-NEXT: [[TMP26:%.*]] = uitofp i32 [[TMP24]] to float 2129; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast float 1.000000e+00, [[TMP26]] 2130; CHECK-NEXT: [[TMP28:%.*]] = fmul fast float [[TMP25]], [[TMP27]] 2131; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]]) 2132; CHECK-NEXT: [[TMP30:%.*]] = fsub fast float -0.000000e+00, [[TMP29]] 2133; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]]) 2134; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32 2135; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]]) 2136; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]]) 2137; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]] 2138; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 1, i32 0 2139; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP32]], [[TMP36]] 2140; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 32767 2141; CHECK-NEXT: [[TMP39:%.*]] = trunc i32 [[TMP38]] to i15 2142; CHECK-NEXT: [[TMP40:%.*]] = insertelement <3 x i15> [[TMP20]], i15 [[TMP39]], i64 1 2143; CHECK-NEXT: [[TMP41:%.*]] = extractelement <3 x i15> [[X]], i64 2 2144; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x i15> [[Y]], i64 2 2145; CHECK-NEXT: [[TMP43:%.*]] = zext i15 [[TMP41]] to i32 2146; CHECK-NEXT: [[TMP44:%.*]] = zext i15 [[TMP42]] to i32 2147; CHECK-NEXT: [[TMP45:%.*]] = uitofp i32 [[TMP43]] to float 2148; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP44]] to float 2149; CHECK-NEXT: [[TMP47:%.*]] = fdiv fast float 1.000000e+00, [[TMP46]] 2150; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP45]], [[TMP47]] 2151; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]]) 2152; CHECK-NEXT: [[TMP50:%.*]] = fsub fast float -0.000000e+00, [[TMP49]] 2153; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]]) 2154; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32 2155; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]]) 2156; CHECK-NEXT: [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]]) 2157; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]] 2158; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 1, i32 0 2159; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP52]], [[TMP56]] 2160; CHECK-NEXT: [[TMP58:%.*]] = and i32 [[TMP57]], 32767 2161; CHECK-NEXT: [[TMP59:%.*]] = trunc i32 [[TMP58]] to i15 2162; CHECK-NEXT: [[TMP60:%.*]] = insertelement <3 x i15> [[TMP40]], i15 [[TMP59]], i64 2 2163; CHECK-NEXT: store <3 x i15> [[TMP60]], <3 x i15> addrspace(1)* [[OUT:%.*]] 2164; CHECK-NEXT: ret void 2165; 2166 %r = udiv <3 x i15> %x, %y 2167 store <3 x i15> %r, <3 x i15> addrspace(1)* %out 2168 ret void 2169} 2170 2171define amdgpu_kernel void @urem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) { 2172; CHECK-LABEL: @urem_v3i15( 2173; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i15> [[X:%.*]], i64 0 2174; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i15> [[Y:%.*]], i64 0 2175; CHECK-NEXT: [[TMP3:%.*]] = zext i15 [[TMP1]] to i32 2176; CHECK-NEXT: [[TMP4:%.*]] = zext i15 [[TMP2]] to i32 2177; CHECK-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP3]] to float 2178; CHECK-NEXT: [[TMP6:%.*]] = uitofp i32 [[TMP4]] to float 2179; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast float 1.000000e+00, [[TMP6]] 2180; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[TMP5]], [[TMP7]] 2181; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) 2182; CHECK-NEXT: [[TMP10:%.*]] = fsub fast float -0.000000e+00, [[TMP9]] 2183; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) 2184; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 2185; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) 2186; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) 2187; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] 2188; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 1, i32 0 2189; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP12]], [[TMP16]] 2190; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], [[TMP4]] 2191; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP3]], [[TMP18]] 2192; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 32767 2193; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i15 2194; CHECK-NEXT: [[TMP22:%.*]] = insertelement <3 x i15> undef, i15 [[TMP21]], i64 0 2195; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x i15> [[X]], i64 1 2196; CHECK-NEXT: [[TMP24:%.*]] = extractelement <3 x i15> [[Y]], i64 1 2197; CHECK-NEXT: [[TMP25:%.*]] = zext i15 [[TMP23]] to i32 2198; CHECK-NEXT: [[TMP26:%.*]] = zext i15 [[TMP24]] to i32 2199; CHECK-NEXT: [[TMP27:%.*]] = uitofp i32 [[TMP25]] to float 2200; CHECK-NEXT: [[TMP28:%.*]] = uitofp i32 [[TMP26]] to float 2201; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast float 1.000000e+00, [[TMP28]] 2202; CHECK-NEXT: [[TMP30:%.*]] = fmul fast float [[TMP27]], [[TMP29]] 2203; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]]) 2204; CHECK-NEXT: [[TMP32:%.*]] = fsub fast float -0.000000e+00, [[TMP31]] 2205; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]]) 2206; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32 2207; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) 2208; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]]) 2209; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]] 2210; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 1, i32 0 2211; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP34]], [[TMP38]] 2212; CHECK-NEXT: [[TMP40:%.*]] = mul i32 [[TMP39]], [[TMP26]] 2213; CHECK-NEXT: [[TMP41:%.*]] = sub i32 [[TMP25]], [[TMP40]] 2214; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 32767 2215; CHECK-NEXT: [[TMP43:%.*]] = trunc i32 [[TMP42]] to i15 2216; CHECK-NEXT: [[TMP44:%.*]] = insertelement <3 x i15> [[TMP22]], i15 [[TMP43]], i64 1 2217; CHECK-NEXT: [[TMP45:%.*]] = extractelement <3 x i15> [[X]], i64 2 2218; CHECK-NEXT: [[TMP46:%.*]] = extractelement <3 x i15> [[Y]], i64 2 2219; CHECK-NEXT: [[TMP47:%.*]] = zext i15 [[TMP45]] to i32 2220; CHECK-NEXT: [[TMP48:%.*]] = zext i15 [[TMP46]] to i32 2221; CHECK-NEXT: [[TMP49:%.*]] = uitofp i32 [[TMP47]] to float 2222; CHECK-NEXT: [[TMP50:%.*]] = uitofp i32 [[TMP48]] to float 2223; CHECK-NEXT: [[TMP51:%.*]] = fdiv fast float 1.000000e+00, [[TMP50]] 2224; CHECK-NEXT: [[TMP52:%.*]] = fmul fast float [[TMP49]], [[TMP51]] 2225; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]]) 2226; CHECK-NEXT: [[TMP54:%.*]] = fsub fast float -0.000000e+00, [[TMP53]] 2227; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]]) 2228; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32 2229; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]]) 2230; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]]) 2231; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]] 2232; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 1, i32 0 2233; CHECK-NEXT: [[TMP61:%.*]] = add i32 [[TMP56]], [[TMP60]] 2234; CHECK-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], [[TMP48]] 2235; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP47]], [[TMP62]] 2236; CHECK-NEXT: [[TMP64:%.*]] = and i32 [[TMP63]], 32767 2237; CHECK-NEXT: [[TMP65:%.*]] = trunc i32 [[TMP64]] to i15 2238; CHECK-NEXT: [[TMP66:%.*]] = insertelement <3 x i15> [[TMP44]], i15 [[TMP65]], i64 2 2239; CHECK-NEXT: store <3 x i15> [[TMP66]], <3 x i15> addrspace(1)* [[OUT:%.*]] 2240; CHECK-NEXT: ret void 2241; 2242 %r = urem <3 x i15> %x, %y 2243 store <3 x i15> %r, <3 x i15> addrspace(1)* %out 2244 ret void 2245} 2246 2247define amdgpu_kernel void @sdiv_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) { 2248; CHECK-LABEL: @sdiv_v3i15( 2249; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i15> [[X:%.*]], i64 0 2250; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i15> [[Y:%.*]], i64 0 2251; CHECK-NEXT: [[TMP3:%.*]] = sext i15 [[TMP1]] to i32 2252; CHECK-NEXT: [[TMP4:%.*]] = sext i15 [[TMP2]] to i32 2253; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 2254; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 2255; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 2256; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float 2257; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float 2258; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 2259; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] 2260; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) 2261; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] 2262; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) 2263; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 2264; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) 2265; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 2266; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] 2267; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 2268; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] 2269; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i15 2270; CHECK-NEXT: [[TMP22:%.*]] = sext i15 [[TMP21]] to i32 2271; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i15 2272; CHECK-NEXT: [[TMP24:%.*]] = insertelement <3 x i15> undef, i15 [[TMP23]], i64 0 2273; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x i15> [[X]], i64 1 2274; CHECK-NEXT: [[TMP26:%.*]] = extractelement <3 x i15> [[Y]], i64 1 2275; CHECK-NEXT: [[TMP27:%.*]] = sext i15 [[TMP25]] to i32 2276; CHECK-NEXT: [[TMP28:%.*]] = sext i15 [[TMP26]] to i32 2277; CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]] 2278; CHECK-NEXT: [[TMP30:%.*]] = ashr i32 [[TMP29]], 30 2279; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 1 2280; CHECK-NEXT: [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float 2281; CHECK-NEXT: [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float 2282; CHECK-NEXT: [[TMP34:%.*]] = fdiv fast float 1.000000e+00, [[TMP33]] 2283; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]] 2284; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]]) 2285; CHECK-NEXT: [[TMP37:%.*]] = fsub fast float -0.000000e+00, [[TMP36]] 2286; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]]) 2287; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32 2288; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]]) 2289; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) 2290; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]] 2291; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP31]], i32 0 2292; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP39]], [[TMP43]] 2293; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[TMP44]] to i15 2294; CHECK-NEXT: [[TMP46:%.*]] = sext i15 [[TMP45]] to i32 2295; CHECK-NEXT: [[TMP47:%.*]] = trunc i32 [[TMP46]] to i15 2296; CHECK-NEXT: [[TMP48:%.*]] = insertelement <3 x i15> [[TMP24]], i15 [[TMP47]], i64 1 2297; CHECK-NEXT: [[TMP49:%.*]] = extractelement <3 x i15> [[X]], i64 2 2298; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x i15> [[Y]], i64 2 2299; CHECK-NEXT: [[TMP51:%.*]] = sext i15 [[TMP49]] to i32 2300; CHECK-NEXT: [[TMP52:%.*]] = sext i15 [[TMP50]] to i32 2301; CHECK-NEXT: [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]] 2302; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP53]], 30 2303; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 1 2304; CHECK-NEXT: [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float 2305; CHECK-NEXT: [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float 2306; CHECK-NEXT: [[TMP58:%.*]] = fdiv fast float 1.000000e+00, [[TMP57]] 2307; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]] 2308; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]]) 2309; CHECK-NEXT: [[TMP61:%.*]] = fsub fast float -0.000000e+00, [[TMP60]] 2310; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]]) 2311; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32 2312; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]]) 2313; CHECK-NEXT: [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]]) 2314; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]] 2315; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[TMP55]], i32 0 2316; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[TMP63]], [[TMP67]] 2317; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP68]] to i15 2318; CHECK-NEXT: [[TMP70:%.*]] = sext i15 [[TMP69]] to i32 2319; CHECK-NEXT: [[TMP71:%.*]] = trunc i32 [[TMP70]] to i15 2320; CHECK-NEXT: [[TMP72:%.*]] = insertelement <3 x i15> [[TMP48]], i15 [[TMP71]], i64 2 2321; CHECK-NEXT: store <3 x i15> [[TMP72]], <3 x i15> addrspace(1)* [[OUT:%.*]] 2322; CHECK-NEXT: ret void 2323; 2324 %r = sdiv <3 x i15> %x, %y 2325 store <3 x i15> %r, <3 x i15> addrspace(1)* %out 2326 ret void 2327} 2328 2329define amdgpu_kernel void @srem_v3i15(<3 x i15> addrspace(1)* %out, <3 x i15> %x, <3 x i15> %y) { 2330; CHECK-LABEL: @srem_v3i15( 2331; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i15> [[X:%.*]], i64 0 2332; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i15> [[Y:%.*]], i64 0 2333; CHECK-NEXT: [[TMP3:%.*]] = sext i15 [[TMP1]] to i32 2334; CHECK-NEXT: [[TMP4:%.*]] = sext i15 [[TMP2]] to i32 2335; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] 2336; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 2337; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 2338; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float 2339; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float 2340; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast float 1.000000e+00, [[TMP9]] 2341; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] 2342; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) 2343; CHECK-NEXT: [[TMP13:%.*]] = fsub fast float -0.000000e+00, [[TMP12]] 2344; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) 2345; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 2346; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) 2347; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) 2348; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] 2349; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 2350; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] 2351; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], [[TMP4]] 2352; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP3]], [[TMP21]] 2353; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i15 2354; CHECK-NEXT: [[TMP24:%.*]] = sext i15 [[TMP23]] to i32 2355; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i15 2356; CHECK-NEXT: [[TMP26:%.*]] = insertelement <3 x i15> undef, i15 [[TMP25]], i64 0 2357; CHECK-NEXT: [[TMP27:%.*]] = extractelement <3 x i15> [[X]], i64 1 2358; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x i15> [[Y]], i64 1 2359; CHECK-NEXT: [[TMP29:%.*]] = sext i15 [[TMP27]] to i32 2360; CHECK-NEXT: [[TMP30:%.*]] = sext i15 [[TMP28]] to i32 2361; CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]] 2362; CHECK-NEXT: [[TMP32:%.*]] = ashr i32 [[TMP31]], 30 2363; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP32]], 1 2364; CHECK-NEXT: [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float 2365; CHECK-NEXT: [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float 2366; CHECK-NEXT: [[TMP36:%.*]] = fdiv fast float 1.000000e+00, [[TMP35]] 2367; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]] 2368; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]]) 2369; CHECK-NEXT: [[TMP39:%.*]] = fsub fast float -0.000000e+00, [[TMP38]] 2370; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]]) 2371; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32 2372; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]]) 2373; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]]) 2374; CHECK-NEXT: [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]] 2375; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP33]], i32 0 2376; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP41]], [[TMP45]] 2377; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], [[TMP30]] 2378; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP29]], [[TMP47]] 2379; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[TMP48]] to i15 2380; CHECK-NEXT: [[TMP50:%.*]] = sext i15 [[TMP49]] to i32 2381; CHECK-NEXT: [[TMP51:%.*]] = trunc i32 [[TMP50]] to i15 2382; CHECK-NEXT: [[TMP52:%.*]] = insertelement <3 x i15> [[TMP26]], i15 [[TMP51]], i64 1 2383; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x i15> [[X]], i64 2 2384; CHECK-NEXT: [[TMP54:%.*]] = extractelement <3 x i15> [[Y]], i64 2 2385; CHECK-NEXT: [[TMP55:%.*]] = sext i15 [[TMP53]] to i32 2386; CHECK-NEXT: [[TMP56:%.*]] = sext i15 [[TMP54]] to i32 2387; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] 2388; CHECK-NEXT: [[TMP58:%.*]] = ashr i32 [[TMP57]], 30 2389; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP58]], 1 2390; CHECK-NEXT: [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float 2391; CHECK-NEXT: [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float 2392; CHECK-NEXT: [[TMP62:%.*]] = fdiv fast float 1.000000e+00, [[TMP61]] 2393; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]] 2394; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]]) 2395; CHECK-NEXT: [[TMP65:%.*]] = fsub fast float -0.000000e+00, [[TMP64]] 2396; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]]) 2397; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32 2398; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) 2399; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]]) 2400; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]] 2401; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[TMP59]], i32 0 2402; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP67]], [[TMP71]] 2403; CHECK-NEXT: [[TMP73:%.*]] = mul i32 [[TMP72]], [[TMP56]] 2404; CHECK-NEXT: [[TMP74:%.*]] = sub i32 [[TMP55]], [[TMP73]] 2405; CHECK-NEXT: [[TMP75:%.*]] = trunc i32 [[TMP74]] to i15 2406; CHECK-NEXT: [[TMP76:%.*]] = sext i15 [[TMP75]] to i32 2407; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[TMP76]] to i15 2408; CHECK-NEXT: [[TMP78:%.*]] = insertelement <3 x i15> [[TMP52]], i15 [[TMP77]], i64 2 2409; CHECK-NEXT: store <3 x i15> [[TMP78]], <3 x i15> addrspace(1)* [[OUT:%.*]] 2410; CHECK-NEXT: ret void 2411; 2412 %r = srem <3 x i15> %x, %y 2413 store <3 x i15> %r, <3 x i15> addrspace(1)* %out 2414 ret void 2415} 2416