1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefix=RV32I 3; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefix=RV64I 4; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+experimental-zbb | FileCheck %s --check-prefixes=RV32IZbb,RV32IZbbNOZbt 5; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+experimental-zbb | FileCheck %s --check-prefixes=RV64IZbb,RV64IZbbNOZbt 6; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+experimental-zbb,+experimental-zbt | FileCheck %s --check-prefixes=RV32IZbb,RV32IZbbZbt 7; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+experimental-zbb,+experimental-zbt | FileCheck %s --check-prefixes=RV64IZbb,RV64IZbbZbt 8 9declare i4 @llvm.sadd.sat.i4(i4, i4) 10declare i8 @llvm.sadd.sat.i8(i8, i8) 11declare i16 @llvm.sadd.sat.i16(i16, i16) 12declare i32 @llvm.sadd.sat.i32(i32, i32) 13declare i64 @llvm.sadd.sat.i64(i64, i64) 14 15define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { 16; RV32I-LABEL: func32: 17; RV32I: # %bb.0: 18; RV32I-NEXT: mv a3, a0 19; RV32I-NEXT: mul a1, a1, a2 20; RV32I-NEXT: add a0, a0, a1 21; RV32I-NEXT: slt a2, a0, a3 22; RV32I-NEXT: slti a1, a1, 0 23; RV32I-NEXT: beq a1, a2, .LBB0_2 24; RV32I-NEXT: # %bb.1: 25; RV32I-NEXT: slti a0, a0, 0 26; RV32I-NEXT: lui a1, 524288 27; RV32I-NEXT: sub a0, a1, a0 28; RV32I-NEXT: .LBB0_2: 29; RV32I-NEXT: ret 30; 31; RV64I-LABEL: func32: 32; RV64I: # %bb.0: 33; RV64I-NEXT: sext.w a0, a0 34; RV64I-NEXT: mulw a1, a1, a2 35; RV64I-NEXT: add a0, a0, a1 36; RV64I-NEXT: lui a1, 524288 37; RV64I-NEXT: addiw a2, a1, -1 38; RV64I-NEXT: bge a0, a2, .LBB0_3 39; RV64I-NEXT: # %bb.1: 40; RV64I-NEXT: bge a1, a0, .LBB0_4 41; RV64I-NEXT: .LBB0_2: 42; RV64I-NEXT: ret 43; RV64I-NEXT: .LBB0_3: 44; RV64I-NEXT: mv a0, a2 45; RV64I-NEXT: blt a1, a0, .LBB0_2 46; RV64I-NEXT: .LBB0_4: 47; RV64I-NEXT: lui a0, 524288 48; RV64I-NEXT: ret 49; 50; RV32IZbbNOZbt-LABEL: func32: 51; RV32IZbbNOZbt: # %bb.0: 52; RV32IZbbNOZbt-NEXT: mv a3, a0 53; RV32IZbbNOZbt-NEXT: mul a1, a1, a2 54; RV32IZbbNOZbt-NEXT: add a0, a0, a1 55; RV32IZbbNOZbt-NEXT: slt a2, a0, a3 56; RV32IZbbNOZbt-NEXT: slti a1, a1, 0 57; RV32IZbbNOZbt-NEXT: beq a1, a2, .LBB0_2 58; RV32IZbbNOZbt-NEXT: # %bb.1: 59; RV32IZbbNOZbt-NEXT: slti a0, a0, 0 60; RV32IZbbNOZbt-NEXT: lui a1, 524288 61; RV32IZbbNOZbt-NEXT: sub a0, a1, a0 62; RV32IZbbNOZbt-NEXT: .LBB0_2: 63; RV32IZbbNOZbt-NEXT: ret 64; 65; RV64IZbb-LABEL: func32: 66; RV64IZbb: # %bb.0: 67; RV64IZbb-NEXT: sext.w a0, a0 68; RV64IZbb-NEXT: mulw a1, a1, a2 69; RV64IZbb-NEXT: add a0, a0, a1 70; RV64IZbb-NEXT: lui a1, 524288 71; RV64IZbb-NEXT: addiw a2, a1, -1 72; RV64IZbb-NEXT: min a0, a0, a2 73; RV64IZbb-NEXT: max a0, a0, a1 74; RV64IZbb-NEXT: ret 75; 76; RV32IZbbZbt-LABEL: func32: 77; RV32IZbbZbt: # %bb.0: 78; RV32IZbbZbt-NEXT: mul a1, a1, a2 79; RV32IZbbZbt-NEXT: add a2, a0, a1 80; RV32IZbbZbt-NEXT: slt a0, a2, a0 81; RV32IZbbZbt-NEXT: slti a1, a1, 0 82; RV32IZbbZbt-NEXT: xor a0, a1, a0 83; RV32IZbbZbt-NEXT: slti a1, a2, 0 84; RV32IZbbZbt-NEXT: lui a3, 524288 85; RV32IZbbZbt-NEXT: addi a4, a3, -1 86; RV32IZbbZbt-NEXT: cmov a1, a1, a4, a3 87; RV32IZbbZbt-NEXT: cmov a0, a0, a1, a2 88; RV32IZbbZbt-NEXT: ret 89 %a = mul i32 %y, %z 90 %tmp = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %a) 91 ret i32 %tmp 92} 93 94define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { 95; RV32I-LABEL: func64: 96; RV32I: # %bb.0: 97; RV32I-NEXT: mv a2, a1 98; RV32I-NEXT: mv a1, a0 99; RV32I-NEXT: add a3, a2, a5 100; RV32I-NEXT: add a0, a0, a4 101; RV32I-NEXT: sltu a1, a0, a1 102; RV32I-NEXT: add a1, a3, a1 103; RV32I-NEXT: xor a3, a2, a1 104; RV32I-NEXT: xor a2, a2, a5 105; RV32I-NEXT: not a2, a2 106; RV32I-NEXT: and a2, a2, a3 107; RV32I-NEXT: bgez a2, .LBB1_2 108; RV32I-NEXT: # %bb.1: 109; RV32I-NEXT: slti a0, a1, 0 110; RV32I-NEXT: lui a2, 524288 111; RV32I-NEXT: sub a2, a2, a0 112; RV32I-NEXT: srai a0, a1, 31 113; RV32I-NEXT: mv a1, a2 114; RV32I-NEXT: .LBB1_2: 115; RV32I-NEXT: ret 116; 117; RV64I-LABEL: func64: 118; RV64I: # %bb.0: 119; RV64I-NEXT: mv a1, a0 120; RV64I-NEXT: add a0, a0, a2 121; RV64I-NEXT: slt a1, a0, a1 122; RV64I-NEXT: slti a2, a2, 0 123; RV64I-NEXT: beq a2, a1, .LBB1_2 124; RV64I-NEXT: # %bb.1: 125; RV64I-NEXT: slti a0, a0, 0 126; RV64I-NEXT: addi a1, zero, -1 127; RV64I-NEXT: slli a1, a1, 63 128; RV64I-NEXT: sub a0, a1, a0 129; RV64I-NEXT: .LBB1_2: 130; RV64I-NEXT: ret 131; 132; RV32IZbbNOZbt-LABEL: func64: 133; RV32IZbbNOZbt: # %bb.0: 134; RV32IZbbNOZbt-NEXT: mv a2, a1 135; RV32IZbbNOZbt-NEXT: mv a1, a0 136; RV32IZbbNOZbt-NEXT: add a3, a2, a5 137; RV32IZbbNOZbt-NEXT: add a0, a0, a4 138; RV32IZbbNOZbt-NEXT: sltu a1, a0, a1 139; RV32IZbbNOZbt-NEXT: add a1, a3, a1 140; RV32IZbbNOZbt-NEXT: xor a3, a2, a1 141; RV32IZbbNOZbt-NEXT: xor a2, a2, a5 142; RV32IZbbNOZbt-NEXT: andn a2, a3, a2 143; RV32IZbbNOZbt-NEXT: bgez a2, .LBB1_2 144; RV32IZbbNOZbt-NEXT: # %bb.1: 145; RV32IZbbNOZbt-NEXT: slti a0, a1, 0 146; RV32IZbbNOZbt-NEXT: lui a2, 524288 147; RV32IZbbNOZbt-NEXT: sub a2, a2, a0 148; RV32IZbbNOZbt-NEXT: srai a0, a1, 31 149; RV32IZbbNOZbt-NEXT: mv a1, a2 150; RV32IZbbNOZbt-NEXT: .LBB1_2: 151; RV32IZbbNOZbt-NEXT: ret 152; 153; RV64IZbbNOZbt-LABEL: func64: 154; RV64IZbbNOZbt: # %bb.0: 155; RV64IZbbNOZbt-NEXT: mv a1, a0 156; RV64IZbbNOZbt-NEXT: add a0, a0, a2 157; RV64IZbbNOZbt-NEXT: slt a1, a0, a1 158; RV64IZbbNOZbt-NEXT: slti a2, a2, 0 159; RV64IZbbNOZbt-NEXT: beq a2, a1, .LBB1_2 160; RV64IZbbNOZbt-NEXT: # %bb.1: 161; RV64IZbbNOZbt-NEXT: slti a0, a0, 0 162; RV64IZbbNOZbt-NEXT: addi a1, zero, -1 163; RV64IZbbNOZbt-NEXT: slli a1, a1, 63 164; RV64IZbbNOZbt-NEXT: sub a0, a1, a0 165; RV64IZbbNOZbt-NEXT: .LBB1_2: 166; RV64IZbbNOZbt-NEXT: ret 167; 168; RV32IZbbZbt-LABEL: func64: 169; RV32IZbbZbt: # %bb.0: 170; RV32IZbbZbt-NEXT: add a2, a1, a5 171; RV32IZbbZbt-NEXT: add a3, a0, a4 172; RV32IZbbZbt-NEXT: sltu a0, a3, a0 173; RV32IZbbZbt-NEXT: add a0, a2, a0 174; RV32IZbbZbt-NEXT: slti a2, a0, 0 175; RV32IZbbZbt-NEXT: lui a6, 524288 176; RV32IZbbZbt-NEXT: addi a4, a6, -1 177; RV32IZbbZbt-NEXT: cmov a2, a2, a4, a6 178; RV32IZbbZbt-NEXT: xor a4, a1, a0 179; RV32IZbbZbt-NEXT: xor a1, a1, a5 180; RV32IZbbZbt-NEXT: andn a1, a4, a1 181; RV32IZbbZbt-NEXT: slti a4, a1, 0 182; RV32IZbbZbt-NEXT: cmov a1, a4, a2, a0 183; RV32IZbbZbt-NEXT: srai a0, a0, 31 184; RV32IZbbZbt-NEXT: cmov a0, a4, a0, a3 185; RV32IZbbZbt-NEXT: ret 186; 187; RV64IZbbZbt-LABEL: func64: 188; RV64IZbbZbt: # %bb.0: 189; RV64IZbbZbt-NEXT: add a1, a0, a2 190; RV64IZbbZbt-NEXT: slti a3, a1, 0 191; RV64IZbbZbt-NEXT: addi a4, zero, -1 192; RV64IZbbZbt-NEXT: slli a5, a4, 63 193; RV64IZbbZbt-NEXT: srli a4, a4, 1 194; RV64IZbbZbt-NEXT: cmov a3, a3, a4, a5 195; RV64IZbbZbt-NEXT: slt a0, a1, a0 196; RV64IZbbZbt-NEXT: slti a2, a2, 0 197; RV64IZbbZbt-NEXT: xor a0, a2, a0 198; RV64IZbbZbt-NEXT: cmov a0, a0, a3, a1 199; RV64IZbbZbt-NEXT: ret 200 %a = mul i64 %y, %z 201 %tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %z) 202 ret i64 %tmp 203} 204 205define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { 206; RV32I-LABEL: func16: 207; RV32I: # %bb.0: 208; RV32I-NEXT: slli a0, a0, 16 209; RV32I-NEXT: srai a0, a0, 16 210; RV32I-NEXT: mul a1, a1, a2 211; RV32I-NEXT: slli a1, a1, 16 212; RV32I-NEXT: srai a1, a1, 16 213; RV32I-NEXT: add a0, a0, a1 214; RV32I-NEXT: lui a1, 8 215; RV32I-NEXT: addi a1, a1, -1 216; RV32I-NEXT: bge a0, a1, .LBB2_3 217; RV32I-NEXT: # %bb.1: 218; RV32I-NEXT: lui a1, 1048568 219; RV32I-NEXT: bge a1, a0, .LBB2_4 220; RV32I-NEXT: .LBB2_2: 221; RV32I-NEXT: ret 222; RV32I-NEXT: .LBB2_3: 223; RV32I-NEXT: mv a0, a1 224; RV32I-NEXT: lui a1, 1048568 225; RV32I-NEXT: blt a1, a0, .LBB2_2 226; RV32I-NEXT: .LBB2_4: 227; RV32I-NEXT: lui a0, 1048568 228; RV32I-NEXT: ret 229; 230; RV64I-LABEL: func16: 231; RV64I: # %bb.0: 232; RV64I-NEXT: slli a0, a0, 48 233; RV64I-NEXT: srai a0, a0, 48 234; RV64I-NEXT: mul a1, a1, a2 235; RV64I-NEXT: slli a1, a1, 48 236; RV64I-NEXT: srai a1, a1, 48 237; RV64I-NEXT: add a0, a0, a1 238; RV64I-NEXT: lui a1, 8 239; RV64I-NEXT: addiw a1, a1, -1 240; RV64I-NEXT: bge a0, a1, .LBB2_3 241; RV64I-NEXT: # %bb.1: 242; RV64I-NEXT: lui a1, 1048568 243; RV64I-NEXT: bge a1, a0, .LBB2_4 244; RV64I-NEXT: .LBB2_2: 245; RV64I-NEXT: ret 246; RV64I-NEXT: .LBB2_3: 247; RV64I-NEXT: mv a0, a1 248; RV64I-NEXT: lui a1, 1048568 249; RV64I-NEXT: blt a1, a0, .LBB2_2 250; RV64I-NEXT: .LBB2_4: 251; RV64I-NEXT: lui a0, 1048568 252; RV64I-NEXT: ret 253; 254; RV32IZbb-LABEL: func16: 255; RV32IZbb: # %bb.0: 256; RV32IZbb-NEXT: sext.h a0, a0 257; RV32IZbb-NEXT: mul a1, a1, a2 258; RV32IZbb-NEXT: sext.h a1, a1 259; RV32IZbb-NEXT: add a0, a0, a1 260; RV32IZbb-NEXT: lui a1, 8 261; RV32IZbb-NEXT: addi a1, a1, -1 262; RV32IZbb-NEXT: min a0, a0, a1 263; RV32IZbb-NEXT: lui a1, 1048568 264; RV32IZbb-NEXT: max a0, a0, a1 265; RV32IZbb-NEXT: ret 266; 267; RV64IZbb-LABEL: func16: 268; RV64IZbb: # %bb.0: 269; RV64IZbb-NEXT: sext.h a0, a0 270; RV64IZbb-NEXT: mul a1, a1, a2 271; RV64IZbb-NEXT: sext.h a1, a1 272; RV64IZbb-NEXT: add a0, a0, a1 273; RV64IZbb-NEXT: lui a1, 8 274; RV64IZbb-NEXT: addiw a1, a1, -1 275; RV64IZbb-NEXT: min a0, a0, a1 276; RV64IZbb-NEXT: lui a1, 1048568 277; RV64IZbb-NEXT: max a0, a0, a1 278; RV64IZbb-NEXT: ret 279 %a = mul i16 %y, %z 280 %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a) 281 ret i16 %tmp 282} 283 284define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { 285; RV32I-LABEL: func8: 286; RV32I: # %bb.0: 287; RV32I-NEXT: slli a0, a0, 24 288; RV32I-NEXT: srai a0, a0, 24 289; RV32I-NEXT: mul a1, a1, a2 290; RV32I-NEXT: slli a1, a1, 24 291; RV32I-NEXT: srai a1, a1, 24 292; RV32I-NEXT: add a0, a0, a1 293; RV32I-NEXT: addi a1, zero, 127 294; RV32I-NEXT: bge a0, a1, .LBB3_3 295; RV32I-NEXT: # %bb.1: 296; RV32I-NEXT: addi a1, zero, -128 297; RV32I-NEXT: bge a1, a0, .LBB3_4 298; RV32I-NEXT: .LBB3_2: 299; RV32I-NEXT: ret 300; RV32I-NEXT: .LBB3_3: 301; RV32I-NEXT: addi a0, zero, 127 302; RV32I-NEXT: addi a1, zero, -128 303; RV32I-NEXT: blt a1, a0, .LBB3_2 304; RV32I-NEXT: .LBB3_4: 305; RV32I-NEXT: addi a0, zero, -128 306; RV32I-NEXT: ret 307; 308; RV64I-LABEL: func8: 309; RV64I: # %bb.0: 310; RV64I-NEXT: slli a0, a0, 56 311; RV64I-NEXT: srai a0, a0, 56 312; RV64I-NEXT: mul a1, a1, a2 313; RV64I-NEXT: slli a1, a1, 56 314; RV64I-NEXT: srai a1, a1, 56 315; RV64I-NEXT: add a0, a0, a1 316; RV64I-NEXT: addi a1, zero, 127 317; RV64I-NEXT: bge a0, a1, .LBB3_3 318; RV64I-NEXT: # %bb.1: 319; RV64I-NEXT: addi a1, zero, -128 320; RV64I-NEXT: bge a1, a0, .LBB3_4 321; RV64I-NEXT: .LBB3_2: 322; RV64I-NEXT: ret 323; RV64I-NEXT: .LBB3_3: 324; RV64I-NEXT: addi a0, zero, 127 325; RV64I-NEXT: addi a1, zero, -128 326; RV64I-NEXT: blt a1, a0, .LBB3_2 327; RV64I-NEXT: .LBB3_4: 328; RV64I-NEXT: addi a0, zero, -128 329; RV64I-NEXT: ret 330; 331; RV32IZbb-LABEL: func8: 332; RV32IZbb: # %bb.0: 333; RV32IZbb-NEXT: sext.b a0, a0 334; RV32IZbb-NEXT: mul a1, a1, a2 335; RV32IZbb-NEXT: sext.b a1, a1 336; RV32IZbb-NEXT: add a0, a0, a1 337; RV32IZbb-NEXT: addi a1, zero, 127 338; RV32IZbb-NEXT: min a0, a0, a1 339; RV32IZbb-NEXT: addi a1, zero, -128 340; RV32IZbb-NEXT: max a0, a0, a1 341; RV32IZbb-NEXT: ret 342; 343; RV64IZbb-LABEL: func8: 344; RV64IZbb: # %bb.0: 345; RV64IZbb-NEXT: sext.b a0, a0 346; RV64IZbb-NEXT: mul a1, a1, a2 347; RV64IZbb-NEXT: sext.b a1, a1 348; RV64IZbb-NEXT: add a0, a0, a1 349; RV64IZbb-NEXT: addi a1, zero, 127 350; RV64IZbb-NEXT: min a0, a0, a1 351; RV64IZbb-NEXT: addi a1, zero, -128 352; RV64IZbb-NEXT: max a0, a0, a1 353; RV64IZbb-NEXT: ret 354 %a = mul i8 %y, %z 355 %tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %a) 356 ret i8 %tmp 357} 358 359define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { 360; RV32I-LABEL: func4: 361; RV32I: # %bb.0: 362; RV32I-NEXT: slli a0, a0, 28 363; RV32I-NEXT: srai a0, a0, 28 364; RV32I-NEXT: mul a1, a1, a2 365; RV32I-NEXT: slli a1, a1, 28 366; RV32I-NEXT: srai a1, a1, 28 367; RV32I-NEXT: add a0, a0, a1 368; RV32I-NEXT: addi a1, zero, 7 369; RV32I-NEXT: bge a0, a1, .LBB4_3 370; RV32I-NEXT: # %bb.1: 371; RV32I-NEXT: addi a1, zero, -8 372; RV32I-NEXT: bge a1, a0, .LBB4_4 373; RV32I-NEXT: .LBB4_2: 374; RV32I-NEXT: ret 375; RV32I-NEXT: .LBB4_3: 376; RV32I-NEXT: addi a0, zero, 7 377; RV32I-NEXT: addi a1, zero, -8 378; RV32I-NEXT: blt a1, a0, .LBB4_2 379; RV32I-NEXT: .LBB4_4: 380; RV32I-NEXT: addi a0, zero, -8 381; RV32I-NEXT: ret 382; 383; RV64I-LABEL: func4: 384; RV64I: # %bb.0: 385; RV64I-NEXT: slli a0, a0, 60 386; RV64I-NEXT: srai a0, a0, 60 387; RV64I-NEXT: mul a1, a1, a2 388; RV64I-NEXT: slli a1, a1, 60 389; RV64I-NEXT: srai a1, a1, 60 390; RV64I-NEXT: add a0, a0, a1 391; RV64I-NEXT: addi a1, zero, 7 392; RV64I-NEXT: bge a0, a1, .LBB4_3 393; RV64I-NEXT: # %bb.1: 394; RV64I-NEXT: addi a1, zero, -8 395; RV64I-NEXT: bge a1, a0, .LBB4_4 396; RV64I-NEXT: .LBB4_2: 397; RV64I-NEXT: ret 398; RV64I-NEXT: .LBB4_3: 399; RV64I-NEXT: addi a0, zero, 7 400; RV64I-NEXT: addi a1, zero, -8 401; RV64I-NEXT: blt a1, a0, .LBB4_2 402; RV64I-NEXT: .LBB4_4: 403; RV64I-NEXT: addi a0, zero, -8 404; RV64I-NEXT: ret 405; 406; RV32IZbb-LABEL: func4: 407; RV32IZbb: # %bb.0: 408; RV32IZbb-NEXT: slli a0, a0, 28 409; RV32IZbb-NEXT: srai a0, a0, 28 410; RV32IZbb-NEXT: mul a1, a1, a2 411; RV32IZbb-NEXT: slli a1, a1, 28 412; RV32IZbb-NEXT: srai a1, a1, 28 413; RV32IZbb-NEXT: add a0, a0, a1 414; RV32IZbb-NEXT: addi a1, zero, 7 415; RV32IZbb-NEXT: min a0, a0, a1 416; RV32IZbb-NEXT: addi a1, zero, -8 417; RV32IZbb-NEXT: max a0, a0, a1 418; RV32IZbb-NEXT: ret 419; 420; RV64IZbb-LABEL: func4: 421; RV64IZbb: # %bb.0: 422; RV64IZbb-NEXT: slli a0, a0, 60 423; RV64IZbb-NEXT: srai a0, a0, 60 424; RV64IZbb-NEXT: mul a1, a1, a2 425; RV64IZbb-NEXT: slli a1, a1, 60 426; RV64IZbb-NEXT: srai a1, a1, 60 427; RV64IZbb-NEXT: add a0, a0, a1 428; RV64IZbb-NEXT: addi a1, zero, 7 429; RV64IZbb-NEXT: min a0, a0, a1 430; RV64IZbb-NEXT: addi a1, zero, -8 431; RV64IZbb-NEXT: max a0, a0, a1 432; RV64IZbb-NEXT: ret 433 %a = mul i4 %y, %z 434 %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %a) 435 ret i4 %tmp 436} 437