1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17# pylint: disable=invalid-name,arguments-differ,no-else-return,unused-argument,missing-docstring 18""" 19QNN pass transformation infrastructure. 20""" 21from tvm import relay 22 23def CanonicalizeOps(): 24 """Converts/Lowers an expression containing QNN ops to an expression containing only core 25 (non-Dialect) Relay ops. Each QNN op is lowered to a sequence of existing Relay ops. This is a 26 target-independent pass. One can register the lowering/transformation function for this op using 27 FTVMQnnCanonicalize attr_name for FTVMLegalize op attribute. An example of this transformation 28 is below 29 30 Examples 31 ________ 32 33 .. code-block:: python 34 35 # Original expression 36 qnn_expr = relay.qnn.op.requantize(y, 37 input_scale=1, 38 input_zero_point=0, 39 output_scale=1, 40 output_zero_point=0, 41 out_dtype='int8') 42 43 # We want to utilize all the existing Relay infrastructure. So, instead of supporting this 44 # QNN requantize op, we convert it into a sequence of existing Relay operators. 45 mod = relay.Module.from_expr(qnn_expr) 46 mod = relay.qnn.transform.CanonicalizeOps()(mod) 47 relay_expr = mod['main'] 48 print(relay_expr) 49 50 def @main(%quantized_data: Tensor[(200), int32]) -> Tensor[(200), int8] { 51 %0 = cast(%quantized_data, dtype="int64") /* ty=Tensor[(200), int64] */; 52 %1 = multiply(%0, 2 /* ty=int64 */) /* ty=Tensor[(200), int64] */; 53 %2 = multiply(%1, 1073741824 /* ty=int64 */) /* ty=Tensor[(200), int64] */; 54 %3 = add(%2, 1073741824 /* ty=int64 */) /* ty=Tensor[(200), int64] */; 55 %4 = right_shift(%3, 31 /* ty=int64 */) /* ty=Tensor[(200), int64] */; 56 %5 = add(0 /* ty=int64 */, %4) /* ty=Tensor[(200), int64] */; 57 %6 = clip(%5, a_min=-128f, a_max=127f) /* ty=Tensor[(200), int64] */; 58 cast(%6, dtype="int8") /* ty=Tensor[(200), int8] */ 59 } 60 61 Returns 62 ------- 63 ret : tvm.relay.Pass 64 The registered pass that canonicalizes QNN ops to Relay ops. 65 """ 66 67 return relay.transform.Legalize("FTVMQnnCanonicalize") 68 69 70def Legalize(): 71 """Legalizes QNN ops. As opposed to Relay Legalize, this one legalizes only QNN ops. One can 72 register a transformation/legalization function for an op by using the FTVMQnnLegalize attr_name 73 for FTVMLegalize op attribute. The isolation of QNN and Relay Legalize gives us separation of 74 concerns, leading to a better software practice. The legalization can be configured to happen 75 per target. An example of this type of legalization is shown below. 76 77 Examples 78 ________ 79 80 Suppose the original graph is as follows 81 82 data(u8) weight(u8) 83 | | 84 | | 85 qnn.conv2d (int32) 86 | 87 | 88 nn.relu (int32) 89 90 Now, we know that Intel Cascade Lake has VNNI instructions to speedup convolution. However, it 91 only works on u8 x i8 inputs. So, here, we can use QNN Legalize to transform the above graph as 92 follows 93 94 data(u8) weight(u8) 95 | | 96 | | 97 | requantize(i8) 98 | | 99 | | 100 qnn.conv2d (int32) 101 | 102 | 103 nn.relu (int32) 104 105 In this legalization, since we have isolated legalization for QNN ops, it will only trigger the 106 transformation for qnn.conv2d (and not nn.relu). This pass can be followed by CanonicalizeOps to 107 further lower the qnn.requantize and qnn.conv2d into an expr containing only Relay ops. 108 109 Returns 110 ------- 111 ret : tvm.relay.Pass 112 The registered pass that legalizes QNN ops. 113 """ 114 115 return relay.transform.Legalize("FTVMQnnLegalize") 116