1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17#pylint: disable=invalid-name 18"""QNN dialect operators.""" 19 20from __future__ import absolute_import as _abs 21from tvm.expr import FloatImm, IntImm 22from tvm.relay.expr import Tuple 23from . import _make 24 25def requantize(data, 26 input_scale, 27 input_zero_point, 28 output_scale, 29 output_zero_point, 30 rounding="UPWARD", 31 out_dtype="int8"): 32 r"""Requantized operator. 33 34 The requantize operator converts one quantized tensor representation to 35 another quantized tensor representation. For the output tensor, we are 36 provided with output scale and zero point. The computation is as follows 37 38 Q_output = zp_output + (scale_input)/(scale_output) * (Q_input - zp_input) 39 40 Parameters 41 ---------- 42 data : tvm.relay.Expr 43 The input data to the operator. 44 45 input_scale: float 46 The quantization scale for the input tensor. 47 48 input_zero_point: int 49 The zero point of the input tensor. 50 51 output_scale: float 52 The quantization scale for the output tensor. 53 54 output_zero_point: int 55 The zero point of the output tensor. 56 57 rounding : string, optional 58 Defines the rounding direction when the value is midway between two 59 representable values. 60 61 out_dtype : str, optional 62 Specifies the output data type. 63 64 Returns 65 ------- 66 result : tvm.relay.Expr 67 The computed result. 68 """ 69 70 return _make.requantize(data, 71 input_scale, 72 input_zero_point, 73 output_scale, 74 output_zero_point, 75 rounding, 76 out_dtype) 77 78 79def quantize(data, 80 output_scale, 81 output_zero_point, 82 out_dtype='int8'): 83 r""" Quantize op 84 This operator takes float32 as input and produces quantized int8 or unit8 as output. 85 The input tensor can be of any shape. The output shape is the same as input shape. 86 87 Q_output = clamp((round(input_tensor/output_scale) + output_zero_point), 88 out_dtype::min, 89 out_dtype::max) 90 91 Parameters 92 ---------- 93 data : tvm.relay.Expr 94 The input tensor to be quantized. Can be of type float32. 95 output_zero_point : int 96 The output zero_point. 97 output_scale : float 98 The output scale. 99 out_dtype : str, optional 100 The data type of the input tensor. Can be [int8, uint8] 101 Returns 102 ------- 103 result : tvm.relay.Expr 104 The computed result. 105 """ 106 107 return _make.quantize(data, 108 output_scale, 109 output_zero_point, 110 out_dtype) 111 112 113def dequantize(data, 114 input_scale, 115 input_zero_point): 116 r""" Dequantize op 117 This operator takes quantized int8 and unit8 as input and produces 118 dequantized float32 as output. The output shape is the same as input shape. The input 119 tensor can be of any shape. 120 121 Parameters 122 ---------- 123 data : tvm.relay.Expr 124 The input tensor to be dequantized. Can be of type [int8, uint8]. 125 input_zero_point : int 126 The output zero_point. 127 input_scale : float 128 The output scale. 129 Returns 130 ------- 131 result : tvm.relay.Expr 132 The computed result. 133 """ 134 135 return _make.dequantize(data, 136 input_scale, 137 input_zero_point) 138 139 140def concatenate(data, 141 input_scales, 142 input_zero_points, 143 output_scale, 144 output_zero_point, 145 axis): 146 """Concatenate the quantized input tensors along the given axis. 147 148 Parameters 149 ---------- 150 data : Union(List[relay.Expr], Tuple[relay.Expr]) 151 The list of quantized tensors. 152 153 input_scales : List[float32] 154 The list of scales of input quantized tensors. 155 156 input_zero_points : List[int32] 157 The list of zero points of input quantized tensors. 158 159 output_scale : float32 160 The scale of the output quantized tensor. 161 162 output_zero_point : int32 163 The zero point of the output quantized tensor. 164 165 axis : int 166 The axis along which the tensors are concatenated. 167 168 Returns 169 ------- 170 result: relay.Expr 171 The concatenated quantized tensor. 172 """ 173 174 data = list(data) 175 if not data: 176 raise ValueError("relay.concatenate requires data to be non-empty.") 177 if not isinstance(axis, int): 178 raise ValueError("For now, we only support integer axis") 179 180 return _make.concatenate(Tuple(data), 181 [FloatImm("float64", x) for x in input_scales], 182 [IntImm("int32", x) for x in input_zero_points], 183 output_scale, 184 output_zero_point, 185 axis) 186 187 188def conv2d(data, 189 kernel, 190 input_zero_point, 191 kernel_zero_point, 192 input_scale, 193 kernel_scale, 194 strides=(1, 1), 195 padding=(0, 0), 196 dilation=(1, 1), 197 groups=1, 198 channels=None, 199 kernel_size=None, 200 data_layout="NCHW", 201 kernel_layout="OIHW", 202 out_layout="", 203 out_dtype="int32"): 204 r"""Quantized 2D convolution. 205 206 This operator convolves quantized data with quantized kernel. The scale of 207 the output quantized tensor is the product of the kernel_scale and 208 input_scale of the input quantized tensors. The zero point of the output 209 quantized tensor is 0. By default, the dtype of output is int32. Please also 210 refer to Requantize operator to understand how to scale back the int32 211 output to (u)int8. 212 213 Parameters 214 ---------- 215 data : tvm.relay.Expr 216 The input data to the operator. 217 218 kernel : tvm.relay.Expr 219 The kernel expressions. 220 221 input_zero_point: int 222 The zero point of the data distribution. 223 224 input_scale: float 225 The scale for the input tensor. The scale for the input tensor is 226 stored purely for convenience here. See more commentary below. 227 228 kernel_scale: float 229 The scale for the weight tensor. The scale for the weight tensor is 230 stored for access to this during relay. This information is not 231 needed in the pass pipeline after qnn.conv2d is lowered to the 232 sequence of steps as in nn.conv2d. See also input_scale in Requantize. 233 234 kernel_zero_point: int 235 The zero point of the quantized_kernel distribution. 236 237 strides : tuple of int, optional 238 The strides of convolution. 239 240 padding : tuple of int, optional 241 The padding of convolution on both sides of inputs before convolution. 242 243 dilation : tuple of int, optional 244 Specifies the dilation rate to be used for dilated convolution. 245 246 groups : int, optional 247 Number of groups for grouped convolution. 248 249 channels : int, optional 250 Number of output channels of this convolution. 251 252 kernel_size : tuple of int, optional 253 The spatial of the convolution kernel. 254 255 data_layout : str, optional 256 Layout of the input. 257 258 kernel_layout : str, optional 259 Layout of the kernel. 260 261 out_layout : str, optional 262 Layout of the output, by default, out_layout is the same as data_layout 263 264 out_dtype : str, optional 265 Specifies the output data type for mixed precision conv2d. 266 267 Returns 268 ------- 269 result : tvm.relay.Expr 270 The computed result. 271 """ 272 273 return _make.conv2d(data, kernel, 274 input_zero_point, kernel_zero_point, 275 input_scale, kernel_scale, 276 strides, padding, dilation, 277 groups, channels, kernel_size, 278 data_layout, kernel_layout, out_layout, out_dtype) 279 280 281def add(lhs, 282 rhs, 283 lhs_scale, 284 lhs_zero_point, 285 rhs_scale, 286 rhs_zero_point, 287 output_scale, 288 output_zero_point): 289 """Quantized addition with numpy-style broadcasting. 290 291 Parameters 292 ---------- 293 lhs : relay.Expr 294 The left hand side quantized input data. 295 296 rhs : relay.Expr 297 The right hand side quantized input data. 298 299 lhs_scale: float 300 The scale of the lhs quantized expr. 301 302 lhs_zero_point: int 303 The zero point of lhs quantized expr. 304 305 rhs_scale: float 306 The scale of the rhs quantized expr. 307 308 rhs_zero_point: int 309 The zero point of rhs quantized expr. 310 311 output_scale: float 312 The scale of the output quantized expr. 313 314 output_zero_point: int 315 The zero point of output quantized expr. 316 317 Returns 318 ------- 319 result : relay.Expr 320 The computed result. 321 322 """ 323 return _make.add(lhs, rhs, 324 lhs_scale, lhs_zero_point, 325 rhs_scale, rhs_zero_point, 326 output_scale, output_zero_point) 327 328 329def dense(data, 330 weight, 331 input_zero_point, 332 kernel_zero_point, 333 input_scale, 334 kernel_scale, 335 units=None, 336 out_dtype="int32"): 337 """Qnn Dense operator. 338 Applies a quantized linear transformation 339 340 .. math:: 341 342 `Y = X * W` 343 344 Parameters 345 ---------- 346 data : tvm.relay.Expr 347 The quantized input data to the operator. 348 weight : tvm.relay.Expr 349 The quantized weight expressions. 350 input_zero_point: int 351 The input zero point. 352 kernel_zero_point: int 353 The kernel zero point. 354 input_scale: float 355 The scale for the input tensor. 356 kernel_scale: float 357 The scale for the weight tensor. The scale for the weight tensor is 358 stored for access to this during relay. This information is not 359 needed in the pass pipeline after qnn.conv2d is lowered to the 360 sequence of steps as in nn.conv2d. See also input_scale in Requantize. 361 units : int, optional 362 Number of hidden units of the dense transformation. 363 out_dtype : str, optional 364 Specifies the output data type for mixed precision dense can be int32 or int16. 365 366 Returns 367 ------- 368 result : tvm.relay.Expr 369 The computed result. 370 """ 371 372 return _make.dense(data, 373 weight, 374 input_zero_point, 375 kernel_zero_point, 376 input_scale, 377 kernel_scale, 378 units, 379 out_dtype) 380 381 382def mul(lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point, 383 output_scale, output_zero_point): 384 """Quantized multiplication with numpy-style broadcasting. 385 386 Parameters 387 ---------- 388 lhs : relay.Expr 389 The left hand side quantized input data. 390 391 rhs : relay.Expr 392 The right hand side quantized input data. 393 394 lhs_scale: float 395 The scale of the lhs quantized expr. 396 397 lhs_zero_point: int 398 The zero point of lhs quantized expr. 399 400 rhs_scale: float 401 The scale of the rhs quantized expr. 402 403 rhs_zero_point: int 404 The zero point of rhs quantized expr. 405 406 output_scale: float 407 The scale of the output quantized expr. 408 409 output_zero_point: int 410 The zero point of output quantized expr. 411 412 Returns 413 ------- 414 result : relay.Expr 415 The computed result. 416 417 """ 418 return _make.mul(lhs, rhs, 419 lhs_scale, lhs_zero_point, 420 rhs_scale, rhs_zero_point, 421 output_scale, output_zero_point) 422