1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17#pylint: disable=invalid-name
18"""QNN dialect operators."""
19
20from __future__ import absolute_import as _abs
21from tvm.expr import FloatImm, IntImm
22from tvm.relay.expr import Tuple
23from . import _make
24
25def requantize(data,
26               input_scale,
27               input_zero_point,
28               output_scale,
29               output_zero_point,
30               rounding="UPWARD",
31               out_dtype="int8"):
32    r"""Requantized operator.
33
34    The requantize operator converts one quantized tensor representation to
35    another quantized tensor representation. For the output tensor, we are
36    provided with output scale and zero point. The computation is as follows
37
38    Q_output = zp_output +  (scale_input)/(scale_output) * (Q_input - zp_input)
39
40    Parameters
41    ----------
42    data : tvm.relay.Expr
43        The input data to the operator.
44
45    input_scale: float
46        The quantization scale for the input tensor.
47
48    input_zero_point: int
49        The zero point of the input tensor.
50
51    output_scale: float
52        The quantization scale for the output tensor.
53
54    output_zero_point: int
55        The zero point of the output tensor.
56
57    rounding : string, optional
58        Defines the rounding direction when the value is midway between two
59        representable values.
60
61    out_dtype : str, optional
62        Specifies the output data type.
63
64    Returns
65    -------
66    result : tvm.relay.Expr
67        The computed result.
68    """
69
70    return _make.requantize(data,
71                            input_scale,
72                            input_zero_point,
73                            output_scale,
74                            output_zero_point,
75                            rounding,
76                            out_dtype)
77
78
79def quantize(data,
80             output_scale,
81             output_zero_point,
82             out_dtype='int8'):
83    r""" Quantize op
84    This operator takes float32 as input and produces quantized int8 or unit8 as output.
85    The input tensor can be of any shape. The output shape is the same as input shape.
86
87    Q_output = clamp((round(input_tensor/output_scale) + output_zero_point),
88                     out_dtype::min,
89                     out_dtype::max)
90
91    Parameters
92    ----------
93    data : tvm.relay.Expr
94        The input tensor to be quantized. Can be of type float32.
95    output_zero_point : int
96        The output zero_point.
97    output_scale : float
98        The output scale.
99    out_dtype : str, optional
100        The data type of the input tensor. Can be [int8, uint8]
101    Returns
102    -------
103    result : tvm.relay.Expr
104        The computed result.
105    """
106
107    return _make.quantize(data,
108                          output_scale,
109                          output_zero_point,
110                          out_dtype)
111
112
113def dequantize(data,
114               input_scale,
115               input_zero_point):
116    r""" Dequantize op
117    This operator takes quantized int8 and unit8 as input and produces
118    dequantized float32 as output. The output shape is the same as input shape. The input
119    tensor can be of any shape.
120
121    Parameters
122    ----------
123    data : tvm.relay.Expr
124        The input tensor to be dequantized. Can be of type [int8, uint8].
125    input_zero_point : int
126        The output zero_point.
127    input_scale : float
128        The output scale.
129    Returns
130    -------
131    result : tvm.relay.Expr
132        The computed result.
133    """
134
135    return _make.dequantize(data,
136                            input_scale,
137                            input_zero_point)
138
139
140def concatenate(data,
141                input_scales,
142                input_zero_points,
143                output_scale,
144                output_zero_point,
145                axis):
146    """Concatenate the quantized input tensors along the given axis.
147
148    Parameters
149    ----------
150    data : Union(List[relay.Expr], Tuple[relay.Expr])
151        The list of quantized tensors.
152
153    input_scales : List[float32]
154        The list of scales of input quantized tensors.
155
156    input_zero_points : List[int32]
157        The list of zero points of input quantized tensors.
158
159    output_scale : float32
160        The scale of the output quantized tensor.
161
162    output_zero_point : int32
163        The zero point of the output quantized tensor.
164
165    axis : int
166        The axis along which the tensors are concatenated.
167
168    Returns
169    -------
170    result: relay.Expr
171        The concatenated quantized tensor.
172    """
173
174    data = list(data)
175    if not data:
176        raise ValueError("relay.concatenate requires data to be non-empty.")
177    if not isinstance(axis, int):
178        raise ValueError("For now, we only support integer axis")
179
180    return _make.concatenate(Tuple(data),
181                             [FloatImm("float64", x) for x in input_scales],
182                             [IntImm("int32", x) for x in input_zero_points],
183                             output_scale,
184                             output_zero_point,
185                             axis)
186
187
188def conv2d(data,
189           kernel,
190           input_zero_point,
191           kernel_zero_point,
192           input_scale,
193           kernel_scale,
194           strides=(1, 1),
195           padding=(0, 0),
196           dilation=(1, 1),
197           groups=1,
198           channels=None,
199           kernel_size=None,
200           data_layout="NCHW",
201           kernel_layout="OIHW",
202           out_layout="",
203           out_dtype="int32"):
204    r"""Quantized 2D convolution.
205
206    This operator convolves quantized data with quantized kernel. The scale of
207    the output quantized tensor is the product of the kernel_scale and
208    input_scale of the input quantized tensors. The zero point of the output
209    quantized tensor is 0. By default, the dtype of output is int32. Please also
210    refer to Requantize operator to understand how to scale back the int32
211    output to (u)int8.
212
213    Parameters
214    ----------
215    data : tvm.relay.Expr
216        The input data to the operator.
217
218    kernel : tvm.relay.Expr
219        The kernel expressions.
220
221    input_zero_point: int
222           The zero point of the data distribution.
223
224    input_scale: float
225           The scale for the input tensor. The scale for the input tensor is
226           stored purely for convenience here. See more commentary below.
227
228    kernel_scale: float
229           The scale for the weight tensor. The scale for the weight tensor is
230           stored for access to this during relay. This information is not
231           needed in the pass pipeline after qnn.conv2d is lowered to the
232           sequence of steps as in nn.conv2d. See also input_scale in Requantize.
233
234    kernel_zero_point: int
235           The zero point of the quantized_kernel distribution.
236
237    strides : tuple of int, optional
238        The strides of convolution.
239
240    padding : tuple of int, optional
241        The padding of convolution on both sides of inputs before convolution.
242
243    dilation : tuple of int, optional
244        Specifies the dilation rate to be used for dilated convolution.
245
246    groups : int, optional
247        Number of groups for grouped convolution.
248
249    channels : int, optional
250        Number of output channels of this convolution.
251
252    kernel_size : tuple of int, optional
253        The spatial of the convolution kernel.
254
255    data_layout : str, optional
256        Layout of the input.
257
258    kernel_layout : str, optional
259        Layout of the kernel.
260
261    out_layout : str, optional
262        Layout of the output, by default, out_layout is the same as data_layout
263
264    out_dtype : str, optional
265        Specifies the output data type for mixed precision conv2d.
266
267    Returns
268    -------
269    result : tvm.relay.Expr
270        The computed result.
271    """
272
273    return _make.conv2d(data, kernel,
274                        input_zero_point, kernel_zero_point,
275                        input_scale, kernel_scale,
276                        strides, padding, dilation,
277                        groups, channels, kernel_size,
278                        data_layout, kernel_layout, out_layout, out_dtype)
279
280
281def add(lhs,
282        rhs,
283        lhs_scale,
284        lhs_zero_point,
285        rhs_scale,
286        rhs_zero_point,
287        output_scale,
288        output_zero_point):
289    """Quantized addition with numpy-style broadcasting.
290
291    Parameters
292    ----------
293    lhs : relay.Expr
294        The left hand side quantized input data.
295
296    rhs : relay.Expr
297        The right hand side quantized input data.
298
299    lhs_scale: float
300        The scale of the lhs quantized expr.
301
302    lhs_zero_point: int
303       The zero point of lhs quantized expr.
304
305    rhs_scale: float
306        The scale of the rhs quantized expr.
307
308    rhs_zero_point: int
309       The zero point of rhs quantized expr.
310
311    output_scale: float
312        The scale of the output quantized expr.
313
314    output_zero_point: int
315       The zero point of output quantized expr.
316
317    Returns
318    -------
319    result : relay.Expr
320        The computed result.
321
322    """
323    return _make.add(lhs, rhs,
324                     lhs_scale, lhs_zero_point,
325                     rhs_scale, rhs_zero_point,
326                     output_scale, output_zero_point)
327
328
329def dense(data,
330          weight,
331          input_zero_point,
332          kernel_zero_point,
333          input_scale,
334          kernel_scale,
335          units=None,
336          out_dtype="int32"):
337    """Qnn Dense operator.
338    Applies a quantized linear transformation
339
340     .. math::
341
342     `Y = X * W`
343
344    Parameters
345    ----------
346    data : tvm.relay.Expr
347        The quantized input data to the operator.
348    weight : tvm.relay.Expr
349        The quantized weight expressions.
350    input_zero_point: int
351        The input zero point.
352    kernel_zero_point: int
353        The kernel zero point.
354    input_scale: float
355        The scale for the input tensor.
356    kernel_scale: float
357        The scale for the weight tensor. The scale for the weight tensor is
358        stored for access to this during relay. This information is not
359        needed in the pass pipeline after qnn.conv2d is lowered to the
360        sequence of steps as in nn.conv2d. See also input_scale in Requantize.
361    units : int, optional
362        Number of hidden units of the dense transformation.
363    out_dtype : str, optional
364        Specifies the output data type for mixed precision dense can be int32 or int16.
365
366    Returns
367    -------
368    result : tvm.relay.Expr
369        The computed result.
370    """
371
372    return _make.dense(data,
373                       weight,
374                       input_zero_point,
375                       kernel_zero_point,
376                       input_scale,
377                       kernel_scale,
378                       units,
379                       out_dtype)
380
381
382def mul(lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point,
383        output_scale, output_zero_point):
384    """Quantized multiplication with numpy-style broadcasting.
385
386    Parameters
387    ----------
388    lhs : relay.Expr
389        The left hand side quantized input data.
390
391    rhs : relay.Expr
392        The right hand side quantized input data.
393
394    lhs_scale: float
395        The scale of the lhs quantized expr.
396
397    lhs_zero_point: int
398       The zero point of lhs quantized expr.
399
400    rhs_scale: float
401        The scale of the rhs quantized expr.
402
403    rhs_zero_point: int
404       The zero point of rhs quantized expr.
405
406    output_scale: float
407        The scale of the output quantized expr.
408
409    output_zero_point: int
410       The zero point of output quantized expr.
411
412    Returns
413    -------
414    result : relay.Expr
415        The computed result.
416
417    """
418    return _make.mul(lhs, rhs,
419                     lhs_scale, lhs_zero_point,
420                     rhs_scale, rhs_zero_point,
421                     output_scale, output_zero_point)
422