//===- TosaToLinalg.cpp - Lowering Tosa to Linalg Dialect -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // These rewriters lower from the Tosa to the Linalg dialect. // //===----------------------------------------------------------------------===// #include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include using namespace mlir; static SmallVector getNParallelLoopsAttrs(unsigned nParallelLoops) { return SmallVector(nParallelLoops, getParallelIteratorTypeName()); } template static mlir::ConstantOp createConstFromIntAttribute(Operation *op, std::string attrName, Type requiredAttrType, OpBuilder &rewriter) { auto castedN = static_cast( op->getAttr(attrName).cast().getValue().getSExtValue()); return rewriter.create( op->getLoc(), IntegerAttr::get(requiredAttrType, castedN)); } template static void getValuesFromIntArrayAttribute(ArrayAttr attr, SmallVector &arrayValues) { for (Attribute val : attr.getValue()) { arrayValues.push_back(val.cast().getValue().getSExtValue()); } } template static mlir::SelectOp clampHelper(Location loc, Value arg, mlir::ConstantOp min, mlir::ConstantOp max, P pred, OpBuilder &rewriter) { auto smallerThanMin = rewriter.create(loc, pred, arg, min); auto minOrArg = rewriter.create(loc, smallerThanMin, min, arg); auto largerThanMax = rewriter.create(loc, pred, max, arg); return rewriter.create(loc, largerThanMax, max, minOrArg); } static mlir::Value applyPad(Location loc, Value input, ArrayRef pad, Attribute padAttr, OpBuilder &rewriter) { // Input should be padded if necessary. if (llvm::all_of(pad, [](int64_t p) { return p == 0; })) return input; ShapedType inputTy = input.getType().cast(); Type inputETy = inputTy.getElementType(); auto inputShape = inputTy.getShape(); assert((inputShape.size() * 2) == pad.size()); SmallVector paddedShape; SmallVector lowIndices; SmallVector highIndices; for (int i = 0, s = inputShape.size(); i < s; i++) { auto lowPad = pad[i * 2]; auto highPad = pad[i * 2 + 1]; paddedShape.push_back(inputShape[i] + highPad + lowPad); lowIndices.push_back(rewriter.getIndexAttr(lowPad)); highIndices.push_back(rewriter.getIndexAttr(highPad)); } Value padValue = rewriter.create(loc, padAttr); return linalg::PadTensorOp::createPadScalarOp( RankedTensorType::get(paddedShape, inputETy), input, padValue, lowIndices, highIndices, loc, rewriter) .result(); } static Value createLinalgBodyCalculationForElementwiseOp(Operation *op, ValueRange args, ArrayRef resultTypes, PatternRewriter &rewriter) { Location loc = op->getLoc(); auto elementTy = op->getOperand(0).getType().cast().getElementType(); // tosa::AbsOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); if (isa(op) && elementTy.isa()) { auto zero = rewriter.create(loc, rewriter.getZeroAttr(elementTy)); auto cmp = rewriter.create(loc, CmpIPredicate::sgt, args[0], zero); auto neg = rewriter.create(loc, zero, args[0]); return rewriter.create(loc, cmp, args[0], neg); } // tosa::AddOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::SubOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::MulOp if (isa(op) && elementTy.isa()) { if (dyn_cast(op).shift() != 0) { (void)rewriter.notifyMatchFailure(op, "Cannot have shift value for float"); return nullptr; } return rewriter.create(loc, resultTypes, args); } // tosa::DivOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::ReciprocalOp if (isa(op) && elementTy.isa()) { auto one = rewriter.create(loc, FloatAttr::get(elementTy, 1)); return rewriter.create(loc, resultTypes, one, args[0]); } if (isa(op) && elementTy.isa()) { Value a = args[0]; Value b = args[1]; auto shift = op->getAttr("shift").cast().getValue().getSExtValue(); if (shift > 0) { auto shiftConst = rewriter.create(loc, shift, /*bitwidth=*/8); if (!a.getType().isInteger(32)) a = rewriter.create(loc, rewriter.getI32Type(), a); if (!b.getType().isInteger(32)) b = rewriter.create(loc, rewriter.getI32Type(), b); auto result = rewriter.create( loc, rewriter.getI32Type(), a, b, shiftConst, rewriter.getBoolAttr(false)); if (elementTy.isInteger(32)) return result; return rewriter.create(loc, elementTy, result); } int aWidth = a.getType().getIntOrFloatBitWidth(); int bWidth = b.getType().getIntOrFloatBitWidth(); int cWidth = resultTypes[0].getIntOrFloatBitWidth(); if (aWidth < cWidth) a = rewriter.create(loc, resultTypes[0], a); if (bWidth < cWidth) b = rewriter.create(loc, resultTypes[0], b); return rewriter.create(loc, resultTypes, a, b); } // tosa::NegateOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); if (isa(op) && elementTy.isa() && !cast(op).quantization_info()) { auto constant = rewriter.create(loc, IntegerAttr::get(elementTy, 0)); return rewriter.create(loc, resultTypes, constant, args[0]); } if (isa(op) && elementTy.isa() && cast(op).quantization_info()) { auto quantizationInfo = cast(op).quantization_info(); int32_t inputBitWidth = elementTy.getIntOrFloatBitWidth(); int64_t inZp = quantizationInfo.getValue().input_zp().getValue().getSExtValue(); int64_t outZp = quantizationInfo.getValue().output_zp().getValue().getSExtValue(); // Compute the maximum value that can occur in the intermediate buffer. int64_t zpAdd = inZp + outZp; int64_t maxValue = APInt::getSignedMaxValue(inputBitWidth).getSExtValue() + std::abs(zpAdd) + 1; // Convert that maximum value into the maximum bitwidth needed to represent // it. We assume 48-bit numbers may be supported further in the pipeline. int intermediateBitWidth = 64; if (maxValue <= APInt::getSignedMaxValue(16).getSExtValue()) { intermediateBitWidth = 16; } else if (maxValue <= APInt::getSignedMaxValue(32).getSExtValue()) { intermediateBitWidth = 32; } else if (maxValue <= APInt::getSignedMaxValue(48).getSExtValue()) { intermediateBitWidth = 48; } Type intermediateType = rewriter.getIntegerType(intermediateBitWidth); Value zpAddValue = rewriter.create( loc, rewriter.getIntegerAttr(intermediateType, zpAdd)); // The negation can be applied by doing: // outputValue = inZp + outZp - inputValue auto ext = rewriter.create(loc, intermediateType, args[0]); auto sub = rewriter.create(loc, zpAddValue, ext); // Clamp to the negation range. auto min = rewriter.create( loc, rewriter.getIntegerAttr( intermediateType, APInt::getSignedMinValue(inputBitWidth).getSExtValue())); auto max = rewriter.create( loc, rewriter.getIntegerAttr( intermediateType, APInt::getSignedMaxValue(inputBitWidth).getSExtValue())); auto clamp = clampHelper(loc, sub, min, max, CmpIPredicate::slt, rewriter); // Truncate to the final value. return rewriter.create(loc, elementTy, clamp); } // tosa::BitwiseAndOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::BitwiseOrOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::BitwiseNotOp if (isa(op) && elementTy.isa()) { auto allOnesAttr = rewriter.getIntegerAttr( elementTy, APInt::getAllOnesValue(elementTy.getIntOrFloatBitWidth())); auto allOnes = rewriter.create(loc, allOnesAttr); return rewriter.create(loc, resultTypes, args[0], allOnes); } // tosa::BitwiseXOrOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::LogicalLeftShiftOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::LogicalRightShiftOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::ArithmeticRightShiftOp if (isa(op) && elementTy.isa()) { auto result = rewriter.create(loc, resultTypes, args); auto round = op->getAttr("round").cast().getValue(); if (!round) { return result; } Type i1Ty = IntegerType::get(rewriter.getContext(), /*width=*/1); auto one = rewriter.create(loc, IntegerAttr::get(elementTy, 1)); auto zero = rewriter.create(loc, IntegerAttr::get(elementTy, 0)); auto i1one = rewriter.create(loc, IntegerAttr::get(i1Ty, 1)); // Checking that input2 != 0 auto shiftValueGreaterThanZero = rewriter.create(loc, CmpIPredicate::sgt, args[1], zero); // Checking for the last bit of input1 to be 1 auto subtract = rewriter.create(loc, resultTypes, args[1], one); auto shifted = rewriter .create(loc, resultTypes, args[0], subtract) ->getResults(); auto truncated = rewriter.create(loc, i1Ty, shifted, mlir::None); auto isInputOdd = rewriter.create(loc, i1Ty, truncated, i1one); auto shouldRound = rewriter.create( loc, i1Ty, shiftValueGreaterThanZero, isInputOdd); auto extended = rewriter.create(loc, resultTypes, shouldRound); return rewriter.create(loc, resultTypes, result, extended); } // tosa::LogicalAnd if (isa(op) && elementTy.isInteger(1)) return rewriter.create(loc, resultTypes, args); // tosa::LogicalNot if (isa(op) && elementTy.isInteger(1)) { auto one = rewriter.create( loc, rewriter.getIntegerAttr(elementTy, 1)); return rewriter.create(loc, resultTypes, args[0], one); } // tosa::LogicalOr if (isa(op) && elementTy.isInteger(1)) return rewriter.create(loc, resultTypes, args); // tosa::LogicalXor if (isa(op) && elementTy.isInteger(1)) return rewriter.create(loc, resultTypes, args); // tosa::PowOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::RsqrtOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::LogOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::ExpOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::TanhOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::GreaterOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, CmpFPredicate::OGT, args[0], args[1]); if (isa(op) && elementTy.isSignlessInteger()) return rewriter.create(loc, CmpIPredicate::sgt, args[0], args[1]); // tosa::GreaterEqualOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, CmpFPredicate::OGE, args[0], args[1]); if (isa(op) && elementTy.isSignlessInteger()) return rewriter.create(loc, CmpIPredicate::sge, args[0], args[1]); // tosa::EqualOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, CmpFPredicate::OEQ, args[0], args[1]); if (isa(op) && elementTy.isSignlessInteger()) return rewriter.create(loc, CmpIPredicate::eq, args[0], args[1]); // tosa::SelectOp if (isa(op)) { elementTy = op->getOperand(1).getType().cast().getElementType(); if (elementTy.isa() || elementTy.isa()) return rewriter.create(loc, args[0], args[1], args[2]); } // tosa::MaximumOp if (isa(op) && elementTy.isa()) { auto predicate = rewriter.create(loc, CmpFPredicate::OGT, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } if (isa(op) && elementTy.isSignlessInteger()) { auto predicate = rewriter.create(loc, CmpIPredicate::sgt, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } // tosa::MinimumOp if (isa(op) && elementTy.isa()) { auto predicate = rewriter.create(loc, CmpFPredicate::OLT, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } if (isa(op) && elementTy.isSignlessInteger()) { auto predicate = rewriter.create(loc, CmpIPredicate::slt, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } // tosa::CeilOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::FloorOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::ClampOp if (isa(op) && elementTy.isa()) { auto min = rewriter.create(loc, elementTy, op->getAttr("min_fp")); auto max = rewriter.create(loc, elementTy, op->getAttr("max_fp")); return clampHelper(loc, args[0], min, max, CmpFPredicate::OLT, rewriter); } if (isa(op) && elementTy.isa()) { auto min = createConstFromIntAttribute(op, "min_int", elementTy, rewriter); auto max = createConstFromIntAttribute(op, "max_int", elementTy, rewriter); return clampHelper(loc, args[0], min, max, CmpIPredicate::slt, rewriter); } // tosa::ReluNOp if (isa(op) && elementTy.isa()) { auto zero = rewriter.create(loc, FloatAttr::get(elementTy, 0)); auto n = rewriter.create(loc, elementTy, op->getAttr("max_fp")); return clampHelper(loc, args[0], zero, n, CmpFPredicate::OLT, rewriter); } if (isa(op) && elementTy.isa()) { auto zero = rewriter.create(loc, IntegerAttr::get(elementTy, 0)); auto n = createConstFromIntAttribute(op, "max_int", elementTy, rewriter); return clampHelper(loc, args[0], zero, n, CmpIPredicate::slt, rewriter); } // tosa::SigmoidOp if (isa(op) && elementTy.isa()) { auto one = rewriter.create(loc, FloatAttr::get(elementTy, 1)); auto negate = rewriter.create(loc, resultTypes, args[0]); auto exp = rewriter.create(loc, resultTypes, negate); auto added = rewriter.create(loc, resultTypes, exp, one); return rewriter.create(loc, resultTypes, one, added); } // tosa::CastOp if (isa(op)) { Type srcTy = elementTy; Type dstTy = resultTypes.front(); bool bitExtend = srcTy.getIntOrFloatBitWidth() < dstTy.getIntOrFloatBitWidth(); if (srcTy == dstTy) return args.front(); if (srcTy.isa() && dstTy.isa() && bitExtend) return rewriter.create(loc, resultTypes, args, mlir::None); if (srcTy.isa() && dstTy.isa() && !bitExtend) return rewriter.create(loc, resultTypes, args, mlir::None); // 1-bit integers need to be treated as signless. if (srcTy.isInteger(1) && mlir::UIToFPOp::areCastCompatible(srcTy, dstTy)) return rewriter.create(loc, resultTypes, args, mlir::None); if (srcTy.isInteger(1) && dstTy.isa() && bitExtend) return rewriter.create(loc, resultTypes, args, mlir::None); // All other si-to-fp conversions should be handled by SIToFP. if (mlir::SIToFPOp::areCastCompatible(srcTy, dstTy)) return rewriter.create(loc, resultTypes, args, mlir::None); // Casting to boolean, floats need to only be checked as not-equal to zero. if (srcTy.isa() && dstTy.isInteger(1)) { Value zero = rewriter.create(loc, rewriter.getFloatAttr(srcTy, 0.0)); return rewriter.create(loc, CmpFPredicate::UNE, args.front(), zero); } if (mlir::FPToSIOp::areCastCompatible(srcTy, dstTy)) { auto zero = rewriter.create(loc, rewriter.getF32FloatAttr(0.0f)); auto half = rewriter.create(loc, rewriter.getF32FloatAttr(0.5f)); auto intMin = rewriter.create( loc, rewriter.getF32FloatAttr( APInt::getSignedMinValue(dstTy.getIntOrFloatBitWidth()) .getSExtValue())); auto intMax = rewriter.create( loc, rewriter.getF32FloatAttr( APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth()) .getSExtValue())); auto added = rewriter.create(loc, args[0], half); auto subbed = rewriter.create(loc, args[0], half); auto negative = rewriter.create(loc, CmpFPredicate::OLT, args[0], zero); auto rounded = rewriter.create(loc, negative, subbed, added); auto clamped = clampHelper(loc, rounded, intMin, intMax, CmpFPredicate::OLT, rewriter); return rewriter.create(loc, dstTy, clamped); } // Casting to boolean, integers need to only be checked as not-equal to // zero. if (srcTy.isa() && dstTy.isInteger(1)) { Value zero = rewriter.create(loc, 0, srcTy.getIntOrFloatBitWidth()); return rewriter.create(loc, CmpIPredicate::ne, args.front(), zero); } if (srcTy.isa() && dstTy.isa() && bitExtend) return rewriter.create(loc, resultTypes, args, mlir::None); if (srcTy.isa() && dstTy.isa() && !bitExtend) { auto intMin = rewriter.create( loc, APInt::getSignedMinValue(dstTy.getIntOrFloatBitWidth()) .getSExtValue(), srcTy.getIntOrFloatBitWidth()); auto intMax = rewriter.create( loc, APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth()) .getSExtValue(), srcTy.getIntOrFloatBitWidth()); auto clamped = clampHelper(loc, args[0], intMin, intMax, CmpIPredicate::slt, rewriter); return rewriter.create(loc, dstTy, clamped); } } (void)rewriter.notifyMatchFailure( op, "unhandled op for linalg body calculation for elementwise op"); return nullptr; } static LogicalResult elementwiseMatchAndRewriteHelper(Operation *operation, PatternRewriter &rewriter) { auto loc = operation->getLoc(); assert(operation->getNumResults() == 1 && "All TOSA elementwise ops should only return a single result."); auto results = operation->getResults(); auto resultTy = operation->getResult(0).getType().dyn_cast(); if (!resultTy) return rewriter.notifyMatchFailure(operation, "All results must be a shaped type"); unsigned rank = resultTy.getRank(); // Construct the indexing maps needed for linalg.generic ops. SmallVector bodyArgTypes; for (Value in : operation->getOperands()) bodyArgTypes.emplace_back(getElementTypeOrSelf(in.getType())); SmallVector opResultTypes; SmallVector initTensors; for (auto result : results) { auto resultTy = result.getType().template cast(); if (!resultTy.hasStaticShape()) return rewriter.notifyMatchFailure( operation, "tosa to linalg conversion expects statically shaped tensors"); initTensors.push_back(rewriter.create( loc, ArrayRef({}), resultTy.getShape(), resultTy.getElementType())); opResultTypes.push_back(result.getType()); } auto bodyResultTypes = llvm::to_vector<4>(llvm::map_range( initTensors, [](Value v) { return getElementTypeOrSelf(v); })); SmallVector operands; SmallVector indexingMaps; indexingMaps.reserve(operation->getNumOperands() + bodyResultTypes.size()); // Input indexing maps may be broadcasted. for (Value operand : operation->getOperands()) { ShapedType type = operand.getType().cast(); if (type.getShape() == resultTy.getShape()) { operands.push_back(operand); indexingMaps.push_back(rewriter.getMultiDimIdentityMap(rank)); continue; } SmallVector newShape; SmallVector affineExprs; newShape.reserve(type.getRank()); for (auto it : llvm::enumerate(type.getShape())) { if (it.value() == resultTy.getDimSize(it.index())) { newShape.push_back(it.value()); affineExprs.push_back( mlir::getAffineDimExpr(it.index(), rewriter.getContext())); } } if (newShape.size() != rank) { operand = rewriter.create( loc, RankedTensorType::get(newShape, type.getElementType()), operand); } operands.push_back(operand); indexingMaps.push_back(AffineMap::get( /*dimCount=*/type.getRank(), /*symbolCount=*/0, affineExprs, rewriter.getContext())); } indexingMaps.append(operation->getNumResults(), rewriter.getMultiDimIdentityMap(rank)); bool didEncounterError = false; auto linalgOp = rewriter.create( loc, opResultTypes, operands, initTensors, indexingMaps, getNParallelLoopsAttrs(rank), [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange blockArgs) { Value opResult = createLinalgBodyCalculationForElementwiseOp( operation, blockArgs.take_front(operation->getNumOperands()), bodyResultTypes, rewriter); if (!opResult) { didEncounterError = true; return; } nestedBuilder.create(loc, opResult); }); if (didEncounterError) return failure(); rewriter.replaceOp(operation, linalgOp->getResults()); return success(); } // Returns the constant initial value for a given reduction operation. The // attribute type varies depending on the element type required. static Attribute createInitialValueForReduceOp(Operation *op, Type elementTy, PatternRewriter &rewriter) { if (isa(op) && elementTy.isa()) return rewriter.getFloatAttr(elementTy, 0.0); if (isa(op) && elementTy.isa()) return rewriter.getIntegerAttr(elementTy, 0); if (isa(op) && elementTy.isa()) return rewriter.getFloatAttr(elementTy, 1.0); if (isa(op) && elementTy.isa()) return rewriter.getIntegerAttr(elementTy, 1); if (isa(op) && elementTy.isa()) return rewriter.getFloatAttr( elementTy, APFloat::getLargest( elementTy.cast().getFloatSemantics(), false)); if (isa(op) && elementTy.isa()) return rewriter.getIntegerAttr( elementTy, APInt::getSignedMaxValue(elementTy.getIntOrFloatBitWidth())); if (isa(op) && elementTy.isa()) return rewriter.getFloatAttr( elementTy, APFloat::getLargest( elementTy.cast().getFloatSemantics(), true)); if (isa(op) && elementTy.isa()) return rewriter.getIntegerAttr( elementTy, APInt::getSignedMinValue(elementTy.getIntOrFloatBitWidth())); if (isa(op) && elementTy.isInteger(1)) return rewriter.getIntegerAttr(elementTy, APInt::getAllOnesValue(1)); if (isa(op) && elementTy.isInteger(1)) return rewriter.getIntegerAttr(elementTy, APInt::getNullValue(1)); if (isa(op) && elementTy.isa()) return rewriter.getFloatAttr( elementTy, APFloat::getLargest( elementTy.cast().getFloatSemantics(), true)); if (isa(op) && elementTy.isa()) return rewriter.getIntegerAttr( elementTy, APInt::getSignedMinValue(elementTy.getIntOrFloatBitWidth())); return {}; } // Creates the body calculation for a reduction. The operations vary depending // on the input type. static Value createLinalgBodyCalculationForReduceOp(Operation *op, ValueRange args, Type elementTy, PatternRewriter &rewriter) { Location loc = op->getLoc(); if (isa(op) && elementTy.isa()) { return rewriter.create(loc, args); } if (isa(op) && elementTy.isa()) { return rewriter.create(loc, args); } if (isa(op) && elementTy.isa()) { return rewriter.create(loc, args); } if (isa(op) && elementTy.isa()) { return rewriter.create(loc, args); } if (isa(op) && elementTy.isa()) { auto predicate = rewriter.create(loc, CmpFPredicate::OLT, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } if (isa(op) && elementTy.isa()) { auto predicate = rewriter.create(loc, CmpIPredicate::slt, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } if (isa(op) && elementTy.isa()) { auto predicate = rewriter.create(loc, CmpFPredicate::OGT, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } if (isa(op) && elementTy.isa()) { auto predicate = rewriter.create(loc, CmpIPredicate::sgt, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } if (isa(op) && elementTy.isInteger(1)) return rewriter.create(loc, args); if (isa(op) && elementTy.isInteger(1)) return rewriter.create(loc, args); return {}; } // Performs the match and rewrite for reduction operations. This includes // declaring a correctly sized initial value, and the linalg.generic operation // that reduces across the specified axis. static LogicalResult reduceMatchAndRewriteHelper(Operation *op, uint64_t axis, PatternRewriter &rewriter) { auto loc = op->getLoc(); auto inputTy = op->getOperand(0).getType().template cast(); auto resultTy = op->getResult(0).getType().template cast(); auto elementTy = resultTy.getElementType(); Value input = op->getOperand(0); llvm::SmallVector reduceShape; for (unsigned i = 0; i < inputTy.getRank(); i++) { if (axis != i) reduceShape.push_back(inputTy.getDimSize(i)); } Type reduceTy = RankedTensorType::get(reduceShape, resultTy.getElementType()); // First fill the output buffer with the init value. auto initTensor = rewriter .create(loc, ArrayRef({}), reduceShape, resultTy.getElementType()) .result(); auto fillValueAttr = createInitialValueForReduceOp(op, elementTy, rewriter); if (!fillValueAttr) return rewriter.notifyMatchFailure( op, "No initial value found for reduction operation"); auto fillValue = rewriter.create(loc, fillValueAttr); auto filledTensor = rewriter.create(loc, fillValue, initTensor).result(); SmallVector srcExprs; SmallVector dstExprs; SmallVector iteratorTypes; for (unsigned int i = 0, rank = inputTy.getRank(); i != rank; ++i) { srcExprs.push_back(mlir::getAffineDimExpr(i, rewriter.getContext())); iteratorTypes.push_back(axis == i ? getReductionIteratorTypeName() : getParallelIteratorTypeName()); if (axis != i) dstExprs.push_back(mlir::getAffineDimExpr(i, rewriter.getContext())); } bool didEncounterError = false; auto maps = AffineMap::inferFromExprList({srcExprs, dstExprs}); auto linalgOp = rewriter.create( loc, reduceTy, input, filledTensor, maps, iteratorTypes, [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange blockArgs) { auto result = createLinalgBodyCalculationForReduceOp( op, blockArgs, elementTy, rewriter); if (result) didEncounterError = true; nestedBuilder.create(loc, result); }); if (!didEncounterError) return failure(); rewriter.replaceOpWithNewOp(op, resultTy, linalgOp.getResults()); return success(); } static LogicalResult convolutionMatchAndRewriterHelper(Operation *op, ConversionPatternRewriter &rewriter) { Location loc = op->getLoc(); Value input = op->getOperand(0); Value weight = op->getOperand(1); Value bias = op->getOperand(2); ShapedType inputTy = input.getType().cast(); ShapedType weightTy = weight.getType().cast(); ShapedType biasTy = bias.getType().cast(); ShapedType resultTy = op->getResult(0).getType().cast(); Type inputETy = inputTy.getElementType(); Type resultETy = resultTy.getElementType(); auto padAttr = op->getAttr("pad").cast(); auto strideTosaAttr = op->getAttr("stride").cast(); auto dilationTosaAttr = op->getAttr("dilation").cast(); bool isQuantized = op->hasAttr("quantization_info"); IntegerAttr iZp; IntegerAttr kZp; if (isQuantized) { auto quantizationInfo = op->getAttr("quantization_info").cast(); iZp = rewriter.getI32IntegerAttr( quantizationInfo.input_zp().getValue().getSExtValue()); kZp = rewriter.getI32IntegerAttr( quantizationInfo.weight_zp().getValue().getSExtValue()); } if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) return rewriter.notifyMatchFailure(op, "tosa.conv ops require static shapes"); auto weightShape = weightTy.getShape(); auto resultShape = resultTy.getShape(); // Apply padding as necessary. Attribute zeroAttr = rewriter.getZeroAttr(inputETy); llvm::SmallVector pad; pad.resize(2, 0); getValuesFromIntArrayAttribute(padAttr, pad); pad.resize(pad.size() + 2, 0); input = applyPad(loc, input, pad, zeroAttr, rewriter); // Broadcast the initial value to the output tensor before convolving. SmallVector indexingMaps; indexingMaps.push_back(AffineMap::get( /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); Value initTensor = rewriter.create( loc, resultTy.getShape(), resultTy.getElementType()); Value biasBroadcast = rewriter .create( loc, resultTy, bias, initTensor, indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { nestedBuilder.create(nestedLoc, args[0]); }) .getResult(0); // Extract the attributes for convolution. llvm::SmallVector stride, dilation; getValuesFromIntArrayAttribute(strideTosaAttr, stride); getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); // Create the convolution op. auto strideAttr = DenseIntElementsAttr::get( RankedTensorType::get({2}, rewriter.getI64Type()), stride); auto dilationAttr = DenseIntElementsAttr::get( RankedTensorType::get({2}, rewriter.getI64Type()), dilation); if (isa(op) && !isQuantized) { rewriter.replaceOpWithNewOp( op, resultTy, ValueRange{input, weight}, ValueRange{biasBroadcast}, strideAttr, dilationAttr); return success(); } if (isa(op) && isQuantized) { auto iZpVal = rewriter.create(loc, iZp); auto kZpVal = rewriter.create(loc, kZp); rewriter.replaceOpWithNewOp( op, resultTy, ValueRange{input, weight, iZpVal, kZpVal}, ValueRange{biasBroadcast}, strideAttr, dilationAttr); return success(); } if (isa(op) && !isQuantized) { ShapedType linalgConvTy = RankedTensorType::get({resultShape[0], resultShape[1], resultShape[2], weightShape[2], weightShape[3]}, resultETy); Value biasReshape = rewriter.create(loc, linalgConvTy, biasBroadcast); Value conv = rewriter .create( loc, linalgConvTy, ValueRange{input, weight}, ValueRange{biasReshape}, dilationAttr, strideAttr) .getResult(0); Value reshape = rewriter.create(loc, resultTy, conv); rewriter.replaceOp(op, reshape); return success(); } return failure(); } namespace { template class PointwiseConverter : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(SrcOp op, PatternRewriter &rewriter) const final { return elementwiseMatchAndRewriteHelper(op, rewriter); } }; template class ConvConverter : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(T op, ArrayRef args, ConversionPatternRewriter &rewriter) const final { return convolutionMatchAndRewriterHelper(op, rewriter); } }; class TransposeConvConverter : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(tosa::TransposeConv2DOp op, ArrayRef args, ConversionPatternRewriter &rewriter) const final { Location loc = op->getLoc(); Value input = op->getOperand(0); Value weight = op->getOperand(1); Value bias = op->getOperand(2); ShapedType inputTy = input.getType().cast(); ShapedType weightTy = weight.getType().cast(); ShapedType biasTy = bias.getType().cast(); ShapedType resultTy = op->getResult(0).getType().cast(); llvm::SmallVector pad; llvm::SmallVector stride; llvm::SmallVector dilation; getValuesFromIntArrayAttribute(op.out_pad().cast(), pad); getValuesFromIntArrayAttribute(op.stride().cast(), stride); getValuesFromIntArrayAttribute(op.dilation().cast(), dilation); // We have not solved for stride / dilation yet. Dilation should be // straight forward but stride is more complicated. Linalg work is likely // required for efficient implementation. if (llvm::any_of(stride, [](int64_t v) { return v != 1; })) return failure(); if (llvm::any_of(dilation, [](int64_t v) { return v != 1; })) return failure(); if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) return failure(); int64_t inputHeight = inputTy.getDimSize(1); int64_t inputWidth = inputTy.getDimSize(2); int64_t kernelHeight = weightTy.getDimSize(1); int64_t kernelWidth = weightTy.getDimSize(2); int64_t outputHeight = resultTy.getDimSize(1); int64_t outputWidth = resultTy.getDimSize(2); int64_t requiredInputHeight = outputHeight + kernelHeight - 1; int64_t requiredInputWidth = outputWidth + kernelWidth - 1; llvm::SmallVector newPad(4, 0); newPad[0] = kernelHeight - 1 - pad[0]; newPad[2] = kernelWidth - 1 - pad[1]; newPad[1] = requiredInputHeight - newPad[0] - inputHeight; newPad[3] = requiredInputWidth - newPad[2] - inputWidth; auto reverse1 = rewriter.create( loc, weightTy, weight, rewriter.getI64IntegerAttr(1)); auto reverse2 = rewriter.create( loc, weightTy, reverse1, rewriter.getI64IntegerAttr(2)); Value conv2d; if (op.quantization_info().hasValue()) { conv2d = rewriter.create( loc, resultTy, input, reverse2, bias, rewriter.getI64ArrayAttr(newPad), rewriter.getI64ArrayAttr(stride), rewriter.getI64ArrayAttr(dilation), op.quantization_info().getValue()); } else { conv2d = rewriter.create( loc, resultTy, input, reverse2, bias, rewriter.getI64ArrayAttr(newPad), rewriter.getI64ArrayAttr(stride), rewriter.getI64ArrayAttr(dilation)); } rewriter.replaceOp(op, conv2d); return success(); } }; class MatMulConverter : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(tosa::MatMulOp op, ArrayRef args, ConversionPatternRewriter &rewriter) const final { tosa::MatMulOp::Adaptor adaptor(args); Location loc = op.getLoc(); auto outputTy = op.getType().cast(); auto outputElementTy = outputTy.getElementType(); auto zeroAttr = rewriter.getZeroAttr(outputElementTy); Value zero = rewriter.create(loc, zeroAttr); auto initTensor = rewriter.create( loc, outputTy.getShape(), outputTy.getElementType()); Value zeroTensor = rewriter.create(loc, zero, initTensor).getResult(0); if (!op.quantization_info()) { rewriter.replaceOpWithNewOp( op, TypeRange{op.getType()}, ValueRange{adaptor.a(), adaptor.b()}, ValueRange{zeroTensor}); return success(); } auto quantizationInfo = op.quantization_info().getValue(); auto aZp = rewriter.create( loc, rewriter.getI32IntegerAttr( quantizationInfo.a_zp().getValue().getSExtValue())); auto bZp = rewriter.create( loc, rewriter.getI32IntegerAttr( quantizationInfo.b_zp().getValue().getSExtValue())); rewriter.replaceOpWithNewOp( op, TypeRange{op.getType()}, ValueRange{adaptor.a(), adaptor.b(), aZp, bZp}, zeroTensor); return success(); } }; class FullyConnectedConverter : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(tosa::FullyConnectedOp op, ArrayRef args, ConversionPatternRewriter &rewriter) const final { Location loc = op.getLoc(); auto outputTy = op.getType().cast(); auto input = op.input(); auto weight = op.weight(); auto bias = op.bias(); auto weightTy = weight.getType().cast(); auto weightShape = weightTy.getShape(); // Creating maps for the output of MatMul and the bias SmallVector indexingMaps; // Broadcast the bias. indexingMaps.push_back(AffineMap::get(/*dimCount=*/2, /*symbolCount=*/0, {rewriter.getAffineDimExpr(1)}, rewriter.getContext())); indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); auto initTensor = rewriter .create(loc, outputTy.getShape(), outputTy.getElementType()) ->getResults(); auto linalgOp = rewriter .create( loc, outputTy, bias, initTensor, indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), [&](OpBuilder &nested_builder, Location nested_loc, ValueRange args) { nested_builder.create(loc, *args.begin()); }) ->getResults(); SmallVector