1// RUN: mlir-opt --split-input-file --tosa-to-linalg-on-tensors %s -verify-diagnostics -o -| FileCheck %s
2
3// CHECK: #[[$MAP0:.*]] = affine_map<() -> ()>
4
5// CHECK-LABEL: @test_abs
6func @test_abs(%arg0: tensor<f32>) -> tensor<f32> {
7  // CHECK: [[INIT:%.+]] = linalg.init_tensor [] : tensor<f32>
8  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%arg0 : tensor<f32>) outs([[INIT]] : tensor<f32>) {
9  // CHECK: ^bb0(%arg1: f32, %arg2: f32):
10  // CHECK:   [[ELEMENT:%.+]] = absf %arg1
11  // CHECK:   linalg.yield [[ELEMENT]] : f32
12  // CHECK: } -> tensor<f32>
13
14  %0 = "tosa.abs"(%arg0) : (tensor<f32>) -> tensor<f32>
15
16  // CHECK: return [[GENERIC]]
17  return %0 : tensor<f32>
18}
19
20// -----
21
22// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
23
24// CHECK-LABEL: @test_abs
25func @test_abs(%arg0: tensor<2xf32>) -> tensor<2xf32> {
26  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2] : tensor<2xf32>
27  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
28  // CHECK: ^bb0(%arg1: f32, %arg2: f32):
29  // CHECK:   [[ELEMENT:%.+]] = absf %arg1
30  // CHECK:   linalg.yield [[ELEMENT]] : f32
31  // CHECK: } -> tensor<2xf32>
32  %0 = "tosa.abs"(%arg0) : (tensor<2xf32>) -> tensor<2xf32>
33
34  // CHECK: return [[GENERIC]]
35  return %0 : tensor<2xf32>
36}
37
38// -----
39
40// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
41
42// CHECK-LABEL: @test_abs
43func @test_abs(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> {
44  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2, 3] : tensor<2x3xf32>
45  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<2x3xf32>) outs([[INIT]] : tensor<2x3xf32>) {
46  // CHECK: ^bb0(%arg1: f32, %arg2: f32):
47  // CHECK:   [[ELEMENT:%.+]] = absf %arg1
48  // CHECK:   linalg.yield [[ELEMENT]] : f32
49  // CHECK: } -> tensor<2x3xf32>
50  %0 = "tosa.abs"(%arg0) : (tensor<2x3xf32>) -> tensor<2x3xf32>
51
52  // CHECK: return [[GENERIC]]
53  return %0 : tensor<2x3xf32>
54}
55
56// -----
57
58// CHECK-LABEL: @test_abs
59func @test_abs(%arg0: tensor<?xf32>) -> tensor<?xf32> {
60  // CHECK: %[[C0:.+]] = constant 0
61  // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
62  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]]]
63  // CHECK: linalg.generic
64  // CHECK: absf
65  %0 = "tosa.abs"(%arg0) : (tensor<?xf32>) -> tensor<?xf32>
66  return %0 : tensor<?xf32>
67}
68
69// -----
70
71// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
72
73// CHECK-LABEL: @test_abs_dyn
74func @test_abs_dyn(%arg0: tensor<2x?xf32>) -> tensor<2x?xf32> {
75  // CHECK: %[[C1:.+]] = constant 1
76  // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C1]]
77  // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, %[[DIM]]]
78  // CHECK: linalg.generic
79  // CHECK: absf
80  %0 = "tosa.abs"(%arg0) : (tensor<2x?xf32>) -> tensor<2x?xf32>
81  return %0 : tensor<2x?xf32>
82}
83// -----
84
85
86// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> ()>
87// CHECK: #[[$MAP1:.*]] = affine_map<(d0) -> (d0)>
88
89// CHECK-LABEL: @test_broadcast
90func @test_broadcast(%arg0: tensor<1xf32>, %arg1: tensor<2xf32>) -> tensor<2xf32> {
91  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2] : tensor<2xf32>
92  // CHECK: [[RESHAPE:%.+]] = linalg.tensor_collapse_shape %arg0
93  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %arg1 : tensor<f32>, tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
94  // CHECK: ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
95  // CHECK:   [[ELEMENT:%.+]] = addf %arg2, %arg3 : f32
96  // CHECK:   linalg.yield [[ELEMENT]] : f32
97  // CHECK: } -> tensor<2xf32>
98  %0 = "tosa.add"(%arg0, %arg1) : (tensor<1xf32>, tensor<2xf32>) -> tensor<2xf32>
99  return %0 : tensor<2xf32>
100}
101
102// -----
103
104// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
105// CHECK: #[[$MAP1:.*]] = affine_map<(d0) -> ()>
106
107// CHECK-LABEL: @test_broadcast_swapped_args
108func @test_broadcast_swapped_args(%arg0: tensor<2xf32>, %arg1: tensor<1xf32>) -> tensor<2xf32> {
109  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2] : tensor<2xf32>
110  // CHECK: [[RESHAPE:%.+]] = linalg.tensor_collapse_shape %arg1
111  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0, [[RESHAPE]] : tensor<2xf32>, tensor<f32>) outs([[INIT]] : tensor<2xf32>) {
112  // CHECK: ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
113  // CHECK:   [[ELEMENT:%.+]] = addf %arg2, %arg3 : f32
114  // CHECK:   linalg.yield [[ELEMENT]] : f32
115  // CHECK: } -> tensor<2xf32>
116  %0 = "tosa.add"(%arg0, %arg1) : (tensor<2xf32>, tensor<1xf32>) -> tensor<2xf32>
117  return %0 : tensor<2xf32>
118}
119
120// -----
121
122// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
123// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
124// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0)>
125
126// CHECK-LABEL: @test_multibroadcast
127func @test_multibroadcast(%arg0: tensor<1x3xf32>, %arg1: tensor<2x1xf32>) -> tensor<2x3xf32> {
128  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2, 3] : tensor<2x3xf32>
129  // CHECK: [[RESHAPE1:%.+]] = linalg.tensor_collapse_shape %arg0 {{\[}}[0, 1]]
130  // CHECK: [[RESHAPE2:%.+]] = linalg.tensor_collapse_shape %arg1 {{\[}}[0, 1]]
131  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins([[RESHAPE1]], [[RESHAPE2]] : tensor<3xf32>, tensor<2xf32>) outs([[INIT]] : tensor<2x3xf32>) {
132  // CHECK: ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
133  // CHECK:   [[ELEMENT:%.+]] = addf %arg2, %arg3 : f32
134  // CHECK:   linalg.yield [[ELEMENT]] : f32
135  // CHECK: } -> tensor<2x3xf32>
136  %0 = "tosa.add"(%arg0, %arg1) : (tensor<1x3xf32>, tensor<2x1xf32>) -> tensor<2x3xf32>
137  return %0 : tensor<2x3xf32>
138}
139
140// -----
141
142// CHECK-LABEL: @test_simple_f32
143func @test_simple_f32(%arg0: tensor<1xf32>) -> () {
144  // CHECK: linalg.generic
145  // CHECK: tanh
146  %0 = "tosa.tanh"(%arg0) : (tensor<1xf32>) -> tensor<1xf32>
147
148  // CHECK: linalg.generic
149  // CHECK: absf
150  %1 = "tosa.abs"(%arg0) : (tensor<1xf32>) -> tensor<1xf32>
151
152  // CHECK: linalg.generic
153  // CHECK: addf
154  %2 = "tosa.add"(%0, %0) : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
155
156  // CHECK: linalg.generic
157  // CHECK: subf
158  %3 = "tosa.sub"(%0, %1) : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
159
160  // CHECK: linalg.generic
161  // CHECK: mulf
162  %4 = "tosa.mul"(%0, %1) {shift = 0 : i32} : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
163
164  // CHECK: linalg.generic
165  // CHECK: negf
166  %5 = "tosa.negate"(%0) : (tensor<1xf32>) -> tensor<1xf32>
167
168  // CHECK: linalg.generic
169  // CHECK: pow
170  %6 = "tosa.pow"(%1, %2) : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
171
172  // CHECK: linalg.generic
173  // CHECK: rsqrt
174  %7 = "tosa.rsqrt"(%1) : (tensor<1xf32>) -> tensor<1xf32>
175
176  // CHECK: linalg.generic
177  // CHECK: log
178  %8 = "tosa.log"(%arg0) : (tensor<1xf32>) -> tensor<1xf32>
179
180  // CHECK: linalg.generic
181  // CHECK: exp
182  %9 = "tosa.exp"(%arg0) : (tensor<1xf32>) -> tensor<1xf32>
183
184  // CHECK: linalg.generic
185  // CHECK: cmpf
186  %10 = "tosa.greater"(%0, %1) : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xi1>
187
188  // CHECK: linalg.generic
189  // CHECK: cmpf
190  %11 = "tosa.greater_equal"(%0, %1) : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xi1>
191
192  // CHECK: linalg.generic
193  // CHECK: cmpf
194  %12 = "tosa.equal"(%0, %1) : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xi1>
195
196  // CHECK: linalg.generic
197  // CHECK: select
198  %13 = "tosa.select"(%10, %0, %1) : (tensor<1xi1>, tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
199
200  // CHECK: linalg.generic
201  // CHECK: cmpf
202  // CHECK: select
203  %14 = "tosa.maximum"(%0, %1) : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
204
205  // CHECK: linalg.generic
206  // CHECK: cmpf
207  // CHECK: select
208  %15 = "tosa.minimum"(%0, %1) : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
209
210  // CHECK: linalg.generic
211  // CHECK: ceil
212  %16 = "tosa.ceil"(%0) : (tensor<1xf32>) -> tensor<1xf32>
213
214  // CHECK: linalg.generic
215  // CHECK: floor
216  %17 = "tosa.floor"(%0) : (tensor<1xf32>) -> tensor<1xf32>
217
218  // CHECK: linalg.generic
219  // CHECK: cmpf
220  // CHECK: select
221  %18 = "tosa.clamp"(%0) {min_int = 1 : i64, max_int = 5 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xf32>) -> tensor<1xf32>
222
223  // CHECK: linalg.generic
224  // CHECK: cmpf
225  // CHECK: select
226  %19 = "tosa.reluN"(%0) {max_int = 5 : i64, max_fp = 5.0 : f32} : (tensor<1xf32>) -> tensor<1xf32>
227
228  // CHECK: linalg.generic
229  // CHECK: negf
230  // CHECK: exp
231  // CHECK: addf
232  // CHECK: divf
233  %20 = "tosa.sigmoid"(%0) : (tensor<1xf32>) -> tensor<1xf32>
234
235  // CHECK: linalg.generic
236  // CHECK: constant 0.000000e+00
237  // CHECK: constant 5.000000e-01
238  // CHECK: constant -2.14748365E+9
239  // CHECK: constant 2.14748365E+9
240  // CHECK: addf
241  // CHECK: subf
242  // CHECK: cmpf olt
243  // CHECK: select
244  // CHECK: cmpf olt
245  // CHECK: select
246  // CHECK: cmpf olt
247  // CHECK: select
248  // CHECK: fptosi
249  %21 = "tosa.cast"(%0) : (tensor<1xf32>) -> tensor<1xi32>
250
251  // CHECK: linalg.generic
252  // CHECK: constant 0
253  // CHECK: cmpf
254  %22 = "tosa.cast"(%0) : (tensor<1xf32>) -> tensor<1xi1>
255
256  // CHECK: linalg.generic
257  // CHECK: fptrunc
258  %23 = "tosa.cast"(%0) : (tensor<1xf32>) -> tensor<1xf16>
259
260  // CHECK: linalg.generic
261  // CHECK: divf
262  %24 = "tosa.reciprocal"(%0) : (tensor<1xf32>) -> tensor<1xf32>
263
264  return
265}
266
267// -----
268
269// CHECK-LABEL: @test_simple_f16
270func @test_simple_f16(%arg0: tensor<1xf16>) -> () {
271
272  // CHECK: linalg.generic
273  // CHECK: fpext
274  %0 = "tosa.cast"(%arg0) : (tensor<1xf16>) -> tensor<1xf32>
275
276  return
277}
278
279// -----
280
281// CHECK-LABEL: @test_simple_i16
282func @test_simple_i16(%arg0: tensor<1xi16>) -> () {
283  // CHECK: linalg.generic
284  // CHECK: sext
285  // CHECK: sext
286  // CHECK: muli
287  %0 = "tosa.mul"(%arg0, %arg0) {shift = 0 : i32} : (tensor<1xi16>, tensor<1xi16>) -> tensor<1xi32>
288
289  return
290}
291
292// -----
293
294// CHECK-LABEL: @test_simple_i32
295func @test_simple_i32(%arg0: tensor<1xi32>) -> () {
296  // CHECK: linalg.generic
297  // CHECK: addi
298  %0 = "tosa.add"(%arg0, %arg0) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
299
300  // CHECK: linalg.generic
301  // CHECK: subi
302  %1 = "tosa.sub"(%arg0, %arg0) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
303
304  // CHECK: linalg.generic
305  // CHECK: muli
306  %2 = "tosa.mul"(%arg0, %arg0) {shift = 0 : i32} : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
307
308  // CHECK: linalg.generic
309  // CHECK: constant 2
310  // CHECK: apply_scale
311  %3 = "tosa.mul"(%arg0, %arg0) {shift = 2 : i32} : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
312
313  // CHECK: linalg.generic
314  // CHECK: divi
315  %4 = "tosa.div"(%arg0, %arg0) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
316
317  // CHECK: linalg.generic
318  // CHECK: [[ZERO:%.+]] = constant 0
319  // CHECK: subi [[ZERO]], %arg1
320  %5 = "tosa.negate"(%arg0) : (tensor<1xi32>) -> tensor<1xi32>
321
322  // CHECK: linalg.generic
323  // CHECK: and
324  %6 = "tosa.bitwise_and"(%arg0, %arg0) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
325
326  // CHECK: linalg.generic
327  // CHECK: or
328  %7 = "tosa.bitwise_or"(%arg0, %arg0) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
329
330  // CHECK: linalg.generic
331  // CHECK: xor
332  %8 = "tosa.bitwise_xor"(%arg0, %arg0) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
333
334  // CHECK: linalg.generic
335  // CHECK: shift_left
336  %9 = "tosa.logical_left_shift"(%arg0, %arg0) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
337
338  // CHECK: linalg.generic
339  // CHECK: shift_right_unsigned
340  %10 = "tosa.logical_right_shift"(%arg0, %arg0) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
341
342  // CHECK: linalg.generic
343  // CHECK: shift_right_signed
344  %11 = "tosa.arithmetic_right_shift"(%arg0, %arg0) {round = 0 : i1} : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
345
346  // CHECK: linalg.generic
347  // CHECK: constant 1
348  // CHECK: constant 0
349  // CHECK: constant true
350  // CHECK: cmpi
351  // CHECK: subi
352  // CHECK: shift_right_signed
353  // CHECK: trunci
354  // CHECK: and
355  // CHECK: and
356  // CHECK: zexti
357  // CHECK: addi
358  %12 = "tosa.arithmetic_right_shift"(%arg0, %arg0) {round = 1 : i1} : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
359
360  // CHECK: scf.while
361  // CHECK: cmpi ne
362  // CHECK: scf.condition
363  // CHECK: shift_right_unsigned
364  // CHECK: subi
365  // CHECK: scf.yield
366  %13 = "tosa.clz"(%arg0) : (tensor<1xi32>) -> tensor<1xi32>
367
368  // CHECK: linalg.generic
369  // CHECK: cmpi
370  %14 = "tosa.greater"(%0, %1) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi1>
371
372  // CHECK: linalg.generic
373  // CHECK: cmpi
374  %15 = "tosa.greater_equal"(%0, %1) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi1>
375
376  // CHECK: linalg.generic
377  // CHECK: select
378  %16 = "tosa.select"(%14, %0, %1) : (tensor<1xi1>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
379
380  // CHECK: linalg.generic
381  // CHECK: cmpi
382  // CHECK: select
383  %17 = "tosa.maximum"(%0, %1) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
384
385  // CHECK: linalg.generic
386  // CHECK: cmpi
387  // CHECK: select
388  %18 = "tosa.minimum"(%0, %1) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
389
390  // CHECK: linalg.generic
391  // CHECK: cmpi
392  // CHECK: select
393  %19 = "tosa.clamp"(%0) {min_int = 1 : i64, max_int = 5 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xi32>) -> tensor<1xi32>
394
395  // CHECK: linalg.generic
396  // CHECK: cmpi
397  // CHECK: select
398  %20 = "tosa.reluN"(%0) {max_int = 5 : i64, max_fp = 5.0 : f32} : (tensor<1xi32>) -> tensor<1xi32>
399
400  // CHECK: linalg.generic
401  // CHECK: constant -32768
402  // CHECK: constant 32767
403  // CHECK: cmpi slt
404  // CHECK: select
405  // CHECK: cmpi slt
406  // CHECK: select
407  // CHECK: trunci
408  %21 = "tosa.cast"(%0) : (tensor<1xi32>) -> tensor<1xi16>
409
410  // CHECK: linalg.generic
411  // CHECK: sexti
412  %22 = "tosa.cast"(%0) : (tensor<1xi32>) -> tensor<1xi64>
413
414  // CHECK: linalg.generic
415  // CHECK: constant 0
416  // CHECK: cmpi
417  %23 = "tosa.cast"(%0) : (tensor<1xi32>) -> tensor<1xi1>
418
419  // CHECK: linalg.generic
420  // CHECK: sitofp
421  %24 = "tosa.cast"(%0) : (tensor<1xi32>) -> tensor<1xf32>
422
423  // CHECK: linalg.generic
424  // CHECK: constant 0
425  // CHECK: cmpi sgt
426  // CHECK: subi
427  // CHECK: select
428  %25 = "tosa.abs"(%arg0) : (tensor<1xi32>) -> tensor<1xi32>
429
430  return
431}
432
433// -----
434
435// CHECK-LABEL: @test_simple_ui8
436func @test_simple_ui8(%arg0: tensor<1xui8>) -> () {
437
438  // CHECK: linalg.generic
439  // CHECK: uitofp
440  %0 = "tosa.cast"(%arg0) : (tensor<1xui8>) -> tensor<1xf32>
441
442  return
443}
444
445// -----
446
447// CHECK-LABEL: @test_i8
448func @test_i8(%arg0: tensor<1xi8>) -> () {
449  // CHECK: linalg.generic
450  // CHECK-DAG: %[[C127:.+]] = constant -127
451  // CHECK-DAG: %[[C126:.+]] = constant 126
452  // CHECK-DAG: %[[CMP1:.+]] = cmpi slt, %arg1, %[[C127]]
453  // CHECK-DAG: %[[SEL1:.+]] = select %[[CMP1]], %[[C127]]
454  // CHECK-DAG: %[[CMP2:.+]] = cmpi slt, %[[C126]], %arg1
455  // CHECK: %[[SEL2:.+]] = select %[[CMP2]], %[[C126]], %[[SEL1]]
456  %0 = "tosa.clamp"(%arg0) {min_int = -127 : i64, max_int = 126 : i64, min_fp = 0.0 : f32, max_fp = 0.0 : f32} : (tensor<1xi8>) -> tensor<1xi8>
457
458  // CHECK: linalg.generic
459  // CHECK-DAG: %[[C128:.+]] = constant -128
460  // CHECK-DAG: %[[C127:.+]] = constant 127
461  // CHECK-DAG: %[[CMP1:.+]] = cmpi slt, %arg1, %[[C128]]
462  // CHECK-DAG: %[[SEL1:.+]] = select %[[CMP1]], %[[C128]]
463  // CHECK-DAG: %[[CMP2:.+]] = cmpi slt, %[[C127]], %arg1
464  // CHECK: %[[SEL2:.+]] = select %[[CMP2]], %[[C127]], %[[SEL1]]
465  %1 = "tosa.clamp"(%arg0) {min_int = -130 : i64, max_int = 130 : i64, min_fp = 0.0 : f32, max_fp = 0.0 : f32} : (tensor<1xi8>) -> tensor<1xi8>
466
467  return
468}
469
470// -----
471
472// CHECK-LABEL: @test_bool
473func @test_bool(%arg0: tensor<1xi1>, %arg1: tensor<1xi1>) -> () {
474  // CHECK: linalg.generic
475  // CHECK: and
476  %0 = "tosa.logical_and"(%arg0, %arg1) : (tensor<1xi1>, tensor<1xi1>) -> tensor<1xi1>
477
478  // CHECK: linalg.generic
479  // CHECK: or
480  %1 = "tosa.logical_or"(%arg0, %arg1) : (tensor<1xi1>, tensor<1xi1>) -> tensor<1xi1>
481
482  // CHECK: linalg.generic
483  // CHECK: xor
484  %2 = "tosa.logical_xor"(%arg0, %arg1) : (tensor<1xi1>, tensor<1xi1>) -> tensor<1xi1>
485
486  // CHECK: linalg.generic
487  // CHECK: constant true
488  // CHECK: xor
489  %3 = "tosa.logical_not"(%arg0) : (tensor<1xi1>) -> tensor<1xi1>
490
491  return
492}
493
494// -----
495
496// CHECK-LABEL: @test_negate_quantized
497func @test_negate_quantized(%arg0: tensor<1xi8>) -> () {
498  // CHECK: linalg.generic
499  // CHECK: [[ZERO:%.+]] = constant 0
500  // CHECK: [[EXT:%.+]] = sexti %arg1 : i8 to i16
501  // CHECK: [[SUB:%.+]] = subi [[ZERO]], [[EXT]]
502  // CHECK: [[MIN:%.+]] = constant -128
503  // CHECK: [[MAX:%.+]] = constant 127
504  // CHECK: [[PRED1:%.+]] = cmpi slt, [[SUB]], [[MIN]]
505  // CHECK: [[LBOUND:%.+]] = select [[PRED1]], [[MIN]], [[SUB]]
506  // CHECK: [[PRED2:%.+]] = cmpi slt, [[MAX]], [[SUB]]
507  // CHECK: [[UBOUND:%.+]] = select [[PRED2]], [[MAX]], [[LBOUND]]
508  // CHECK: [[TRUNC:%.+]] = trunci [[UBOUND]]
509  // CHECK: linalg.yield [[TRUNC]]
510  %0 = "tosa.negate"(%arg0) {quantization_info = { input_zp = 0 : i32, output_zp = 0 : i32}} : (tensor<1xi8>) -> tensor<1xi8>
511
512  // CHECK: linalg.generic
513  // CHECK: [[EXT:%.+]] = sexti %arg1 : i8 to i16
514  %1 = "tosa.negate"(%arg0) {quantization_info = { input_zp = 32639 : i32, output_zp = 0 : i32}} : (tensor<1xi8>) -> tensor<1xi8>
515
516  // CHECK: linalg.generic
517  // CHECK: [[EXT:%.+]] = sexti %arg1 : i8 to i32
518  %2 = "tosa.negate"(%arg0) {quantization_info = { input_zp = 32640 : i32, output_zp = 0 : i32}} : (tensor<1xi8>) -> tensor<1xi8>
519
520  return
521}
522
523// -----
524
525// CHECK-LABEL: @test_reshape_downrank
526func @test_reshape_downrank(%arg0: tensor<2x3xf32>) -> tensor<6xf32> {
527  // CHECK: [[RESHAPE:%.+]] = linalg.tensor_collapse_shape %arg0 {{\[}}[0, 1]]
528  %0 = "tosa.reshape"(%arg0) {new_shape = [6]} : (tensor<2x3xf32>) -> tensor<6xf32>
529  // CHECK: return [[RESHAPE]]
530  return %0 : tensor<6xf32>
531}
532
533// -----
534
535// CHECK-LABEL: @test_reshape_uprank
536func @test_reshape_uprank(%arg0: tensor<6xf32>) -> tensor<2x3xf32> {
537  // CHECK: [[RESHAPE:%.+]] = linalg.tensor_expand_shape %arg0 {{\[}}[0, 1]]
538  %0 = "tosa.reshape"(%arg0) {new_shape = [2, 3]} : (tensor<6xf32>) -> tensor<2x3xf32>
539  // CHECK: return [[RESHAPE]]
540  return %0 : tensor<2x3xf32>
541}
542
543// -----
544
545// CHECK-LABEL: @test_reshape_samerank
546func @test_reshape_samerank(%arg0: tensor<3x2xf32>) -> tensor<2x3xf32> {
547  // CHECK-SAME: (%[[ARG0:.*]]: tensor<3x2xf32>)
548  // CHECK-NEXT: %[[RESHAPE1:.*]] = linalg.tensor_collapse_shape %[[ARG0]] {{\[}}[0, 1]]
549  // CHECK-NEXT: %[[RESHAPE2:.*]] = linalg.tensor_expand_shape %[[RESHAPE1]] {{\[}}[0, 1]]
550  %0 = "tosa.reshape"(%arg0) {new_shape = [2, 3]} : (tensor<3x2xf32>) -> tensor<2x3xf32>
551  // CHECK-NEXT: return %[[RESHAPE2]]
552  return %0 : tensor<2x3xf32>
553}
554
555// -----
556
557// CHECK-LABEL: @test_reshape_downrank_6D
558func @test_reshape_downrank_6D(%arg0: tensor<1x2x3x5x7x11xf32>) -> tensor<6x5x77xf32> {
559  // CHECK: linalg.tensor_collapse_shape %arg0 {{\[}}[0, 1, 2], [3], [4, 5]]
560  %0 = "tosa.reshape"(%arg0) {new_shape = [6, 5, 77]} : (tensor<1x2x3x5x7x11xf32>) -> tensor<6x5x77xf32>
561  return %0 : tensor<6x5x77xf32>
562}
563
564// -----
565
566// CHECK-LABEL: @test_identity
567func @test_identity(%arg0: tensor<1xf32>, %arg1: tensor<1xi32>) -> (tensor<1xf32>, tensor<1xi32>) {
568  %0 = "tosa.identity"(%arg0) : (tensor<1xf32>) -> tensor<1xf32>
569  %1 = "tosa.identity"(%arg1) : (tensor<1xi32>) -> tensor<1xi32>
570
571  // CHECK: return %arg0, %arg1
572  return %0, %1 : tensor<1xf32>, tensor<1xi32>
573}
574
575// -----
576
577// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2) -> (d2, d0, d1)>
578// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
579
580// CHECK-LABEL: @test_transpose
581// CHECK-SAME: ([[ARG0:%.+]]: tensor<1x2x3xi32>)
582func @test_transpose(%arg0: tensor<1x2x3xi32>) -> () {
583  %0 = constant dense<[1, 2, 0]> : tensor<3xi32>
584  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2, 3, 1]
585  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins([[ARG0]] : tensor<1x2x3xi32>) outs([[OUT:%.+]] : tensor<2x3x1xi32>)
586  // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32)
587  // CHECK:   linalg.yield [[ARG1]]
588  // CHECK: }
589  %1 = "tosa.transpose"(%arg0, %0) : (tensor<1x2x3xi32>, tensor<3xi32>) -> (tensor<2x3x1xi32>)
590  return
591}
592
593// -----
594
595// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>
596// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
597
598// CHECK-LABEL: @test_transpose_dyn
599// CHECK-SAME: (%[[ARG0:.+]]: tensor<1x?x3x4xi32>)
600func @test_transpose_dyn(%arg0: tensor<1x?x3x4xi32>) -> () {
601  %0 = constant dense<[1, 3, 0, 2]> : tensor<4xi32>
602  // CHECK: %[[C1:.+]] = constant 1
603  // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C1]]
604  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 4, 1, 3]
605  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x3x4xi32>) outs([[OUT:%.+]] : tensor<?x4x1x3xi32>)
606  // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32)
607  // CHECK:   linalg.yield [[ARG1]]
608  // CHECK: }
609  %1 = "tosa.transpose"(%arg0, %0) : (tensor<1x?x3x4xi32>, tensor<4xi32>) -> (tensor<?x4x1x3xi32>)
610  return
611}
612
613// -----
614
615// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1, d0)>
616// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)>
617
618// CHECK-LABEL: @test_transpose_dyn
619// CHECK-SAME: (%[[ARG0:.+]]: tensor<?x?xf32>)
620func @test_transpose_dyn_multiple(%arg0: tensor<?x?xf32>) -> () {
621  %0 = constant dense<[1, 0]> : tensor<2xi32>
622  // CHECK: %[[C0:.+]] = constant 0
623  // CHECK: %[[DIM0:.+]] = tensor.dim %arg0, %[[C0]]
624  // CHECK: %[[C1:.+]] = constant 1
625  // CHECK: %[[DIM1:.+]] = tensor.dim %arg0, %[[C1]]
626  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM1]], %[[DIM0]]]
627  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs([[OUT:%.+]] : tensor<?x?xf32>)
628  // CHECK: ^bb0([[ARG1:%.+]]: f32, [[ARG2:%.+]]: f32)
629  // CHECK:   linalg.yield [[ARG1]]
630  // CHECK: }
631  %1 = "tosa.transpose"(%arg0, %0) : (tensor<?x?xf32>, tensor<2xi32>) -> (tensor<?x?xf32>)
632  return
633}
634
635// -----
636
637// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
638// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
639// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0)>
640
641// CHECK-LABEL: @reduce_float
642// CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xf32>
643func @reduce_float(%arg0: tensor<5x4xf32>) -> () {
644  // CHECK: [[INIT:%.+]] = linalg.init_tensor [4]
645  // CHECK: [[CST0:%.+]] = constant 0.0
646  // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
647  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<4xf32>)
648  // CHECK: ^bb0(%arg1: f32, %arg2: f32)
649  // CHECK:   [[RES:%.+]] = addf %arg1, %arg2 : f32
650  // CHECK:   linalg.yield [[RES]] : f32
651  // CHECK: linalg.tensor_expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<4xf32> into tensor<1x4xf32>
652  %0 = "tosa.reduce_sum"(%arg0) {axis = 0 : i64} : (tensor<5x4xf32>) -> tensor<1x4xf32>
653
654  // CHECK: [[INIT:%.+]] = linalg.init_tensor [5]
655  // CHECK: [[CST0:%.+]] = constant 0.0
656  // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
657  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<5xf32>)
658  // CHECK: ^bb0(%arg1: f32, %arg2: f32)
659  // CHECK:   [[RES:%.+]] = addf %arg1, %arg2 : f32
660  // CHECK:   linalg.yield [[RES]] : f32
661  // CHECK: linalg.tensor_expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<5xf32> into tensor<5x1xf32>
662  %1 = "tosa.reduce_sum"(%arg0) {axis = 1 : i64} : (tensor<5x4xf32>) -> tensor<5x1xf32>
663
664  // CHECK: constant 1.0
665  // CHECK: linalg.fill
666  // CHECK: linalg.generic
667  // CHECK: mulf
668  %2 = "tosa.reduce_prod"(%arg0) {axis = 0 : i64} : (tensor<5x4xf32>) -> tensor<1x4xf32>
669
670  // CHECK: constant 3.40282347E+38 : f32
671  // CHECK: linalg.fill
672  // CHECK: linalg.generic
673  // CHECK: cmpf olt
674  // CHECK: select
675  %3 = "tosa.reduce_min"(%arg0) {axis = 0 : i64} : (tensor<5x4xf32>) -> tensor<1x4xf32>
676
677  // CHECK: constant -3.40282347E+38 : f32
678  // CHECK: linalg.fill
679  // CHECK: linalg.generic
680  // CHECK: cmpf ogt
681  // CHECK: select
682  %4 = "tosa.reduce_max"(%arg0) {axis = 0 : i64} : (tensor<5x4xf32>) -> tensor<1x4xf32>
683  return
684}
685
686// -----
687
688// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
689// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
690// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0)>
691
692// CHECK-LABEL: @reduce_int
693// CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xi32>
694func @reduce_int(%arg0: tensor<5x4xi32>) -> () {
695  // CHECK: [[INIT:%.+]] = linalg.init_tensor [4]
696  // CHECK: [[CST0:%.+]] = constant 0
697  // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
698  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<4xi32>)
699  // CHECK: ^bb0(%arg1: i32, %arg2: i32)
700  // CHECK:   [[RES:%.+]] = addi %arg1, %arg2 : i32
701  // CHECK:   linalg.yield [[RES]] : i32
702  // CHECK: linalg.tensor_expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<4xi32> into tensor<1x4xi32>
703  %0 = "tosa.reduce_sum"(%arg0) {axis = 0 : i64} : (tensor<5x4xi32>) -> tensor<1x4xi32>
704
705  // CHECK: [[INIT:%.+]] = linalg.init_tensor [5]
706  // CHECK: [[CST0:%.+]] = constant 0
707  // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
708  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<5xi32>)
709  // CHECK: ^bb0(%arg1: i32, %arg2: i32)
710  // CHECK:   [[RES:%.+]] = addi %arg1, %arg2 : i32
711  // CHECK:   linalg.yield [[RES]] : i32
712  // CHECK: linalg.tensor_expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<5xi32> into tensor<5x1xi32>
713  %1 = "tosa.reduce_sum"(%arg0) {axis = 1 : i64} : (tensor<5x4xi32>) -> tensor<5x1xi32>
714
715  // CHECK: constant 1
716  // CHECK: linalg.fill
717  // CHECK: linalg.generic
718  // CHECK: muli
719  %2 = "tosa.reduce_prod"(%arg0) {axis = 0 : i64} : (tensor<5x4xi32>) -> tensor<1x4xi32>
720
721  // CHECK: constant 2147483647 : i32
722  // CHECK: linalg.fill
723  // CHECK: linalg.generic
724  // CHECK: cmpi slt
725  // CHECK: select
726  %3 = "tosa.reduce_min"(%arg0) {axis = 0 : i64} : (tensor<5x4xi32>) -> tensor<1x4xi32>
727
728  // CHECK: constant -2147483648 : i32
729  // CHECK: linalg.fill
730  // CHECK: linalg.generic
731  // CHECK: cmpi sgt
732  // CHECK: select
733  %4 = "tosa.reduce_max"(%arg0) {axis = 0 : i64} : (tensor<5x4xi32>) -> tensor<1x4xi32>
734  return
735}
736
737// -----
738
739// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
740// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
741
742// CHECK-LABEL: @reduce_bool
743// CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xi1>
744func @reduce_bool(%arg0: tensor<5x4xi1>) -> () {
745  // CHECK: [[INIT:%.+]] = linalg.init_tensor [4]
746  // CHECK: [[CST0:%.+]] = constant true
747  // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
748  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) outs([[FILL]] : tensor<4xi1>)
749  // CHECK: ^bb0(%arg1: i1, %arg2: i1)
750  // CHECK:   [[RES:%.+]] = and %arg1, %arg2 : i1
751  // CHECK:   linalg.yield [[RES]] : i1
752  // CHECK: linalg.tensor_expand_shape [[GENERIC]] {{\[}}[0, 1]] : tensor<4xi1> into tensor<1x4xi1>
753  %0 = "tosa.reduce_all"(%arg0) {axis = 0 : i64} : (tensor<5x4xi1>) -> tensor<1x4xi1>
754
755  // CHECK: constant false
756  // CHECK: linalg.fill
757  // CHECK: linalg.generic
758  // CHECK: or
759  %1 = "tosa.reduce_any"(%arg0) {axis = 0 : i64} : (tensor<5x4xi1>) -> tensor<1x4xi1>
760
761  return
762}
763
764// -----
765
766// CHECK-LABEL: @concat
767func @concat(%arg0: tensor<5x1xf32>, %arg1: tensor<6x1xf32>) -> () {
768  // CHECK: [[AXIS:%.+]] = constant 0
769  // CHECK: [[STRIDE:%.+]]   = constant 1
770  // CHECK: [[OFFSET:%.+]] = constant 0 : index
771  // CHECK: [[IDX0:%.+]] = constant 0 : index
772  // CHECK: [[ARG0_DIM0:%.+]] = tensor.dim %arg0, [[IDX0]]
773  // CHECK: [[IDX1:%.+]] = constant 1 : index
774  // CHECK: [[ARG0_DIM1:%.+]] = tensor.dim %arg0, [[IDX1]]
775  // CHECK: [[ARG1_AXIS:%.+]] = tensor.dim %arg1, [[AXIS]]
776  // CHECK: [[RESULT_AXIS:%.+]] = addi [[ARG0_DIM0]], [[ARG1_AXIS]]
777  // CHECK: [[INIT:%.+]] = linalg.init_tensor [11, 1]
778  // CHECK: [[CST:%.+]] = constant 0.0
779  // CHECK: [[FILL:%.+]] = linalg.fill([[CST]], [[INIT]])
780  // CHECK: [[ARG0_DIM0:%.+]] = tensor.dim %arg0, [[AXIS]]
781  // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
782  // CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM0]]
783  // CHECK: [[ARG1_DIM0:%.+]] = tensor.dim %arg1, [[AXIS]]
784  // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %arg1 into [[INSERT0]]{{\[}}[[NEW_OFFSET]], [[OFFSET]]] {{\[}}[[ARG1_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
785  %0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x1xf32>, tensor<6x1xf32>)  -> (tensor<11x1xf32>)
786
787  // CHECK: [[AXIS:%.+]] = constant 1
788  // CHECK: [[STRIDE:%.+]]   = constant 1
789  // CHECK: [[OFFSET:%.+]] = constant 0 : index
790  // CHECK: [[IDX0:%.+]] = constant 0 : index
791  // CHECK: [[ARG0_DIM0:%.+]] = tensor.dim %arg0, [[IDX0]]
792  // CHECK: [[IDX1:%.+]] = constant 1 : index
793  // CHECK: [[ARG0_DIM1:%.+]] = tensor.dim %arg0, [[IDX1]]
794  // CHECK: [[ARG1_AXIS:%.+]] = tensor.dim %arg0, [[AXIS]]
795  // CHECK: [[RESULT_AXIS:%.+]] = addi [[ARG0_DIM1]], [[ARG1_AXIS]]
796  // CHECK: [[INIT:%.+]] = linalg.init_tensor [5, 2]
797  // CHECK: [[CST:%.+]] = constant 0.0
798  // CHECK: [[FILL:%.+]] = linalg.fill([[CST]], [[INIT]])
799  // CHECK: [[ARG0_DIM1:%.+]] = tensor.dim %arg0, [[AXIS]]
800  // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
801  // CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM1]]
802  // CHECK: [[ARG1_DIM1:%.+]] = tensor.dim %arg0, [[AXIS]]
803  // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %arg0 into [[INSERT0]]{{\[}}[[OFFSET]], [[NEW_OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG1_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
804  %1 = "tosa.concat"(%arg0, %arg0) { axis = 1 : i64} : (tensor<5x1xf32>, tensor<5x1xf32>)  -> (tensor<5x2xf32>)
805  return
806}
807
808// -----
809
810// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
811
812// CHECK-LABEL: @rescale_i8
813func @rescale_i8(%arg0 : tensor<2xi8>) -> () {
814  // CHECK: [[C0:%.+]] = constant 19689
815  // CHECK: [[C1:%.+]] = constant 15
816  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2]
817  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
818  // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
819  // CHECK: [[C17:%.+]] = constant 17
820  // CHECK: [[C22:%.+]] = constant 22
821  // CHECK-DAG: [[IN32:%.+]] = sexti [[IN]]
822  // CHECK-DAG: [[IN_ZEROED:%.+]] = subi [[IN32]], [[C17]]
823  // CHECK-DAG: [[SCALED:%.+]] = "tosa.apply_scale"([[IN_ZEROED]], [[C0]], [[C1]]) {double_round = false}
824  // CHECK-DAG: [[SCALED_ZEROED:%.+]] = addi [[SCALED]], [[C22]]
825  // CHECK-DAG: [[CMIN:%.+]] = constant -128
826  // CHECK-DAG: [[CMAX:%.+]] = constant 127
827  // CHECK-DAG: [[MINLT:%.+]] = cmpi slt, [[SCALED_ZEROED]], [[CMIN]]
828  // CHECK-DAG: [[MAXLT:%.+]] = cmpi slt, [[CMAX]], [[SCALED_ZEROED]]
829  // CHECK-DAG: [[LOWER:%.+]] = select [[MINLT]], [[CMIN]], [[SCALED_ZEROED]]
830  // CHECK-DAG: [[BOUNDED:%.+]] = select [[MAXLT]], [[CMAX]], [[LOWER]]
831  // CHECK-DAG: [[TRUNC:%.+]] = trunci [[BOUNDED]]
832  // CHECK-DAG: linalg.yield [[TRUNC]]
833  %0 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<2xi8>)  -> (tensor<2xi8>)
834
835  // CHECK: [[C0:%.+]] = constant 19689
836  // CHECK: [[C1:%.+]] = constant 15
837  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2]
838  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xi8>) outs([[INIT]] : tensor<2xui8>)
839  // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: ui8):
840  // CHECK: [[C17:%.+]] = constant 17
841  // CHECK: [[C22:%.+]] = constant 22
842  // CHECK-DAG: [[IN32:%.+]] = sexti [[IN]]
843  // CHECK-DAG: [[IN_ZEROED:%.+]] = subi [[IN32]], [[C17]]
844  // CHECK-DAG: [[SCALED:%.+]] = "tosa.apply_scale"([[IN_ZEROED]], [[C0]], [[C1]]) {double_round = false}
845  // CHECK-DAG: [[SCALED_ZEROED:%.+]] = addi [[SCALED]], [[C22]]
846  // CHECK-DAG: [[CMIN:%.+]] = constant 0
847  // CHECK-DAG: [[CMAX:%.+]] = constant 255
848  // CHECK-DAG: [[MINLT:%.+]] = cmpi slt, [[SCALED_ZEROED]], [[CMIN]]
849  // CHECK-DAG: [[LOWER:%.+]] = select [[MINLT]], [[CMIN]], [[SCALED_ZEROED]]
850  // CHECK-DAG: [[MAXLT:%.+]] = cmpi slt, [[CMAX]], [[SCALED_ZEROED]]
851  // CHECK-DAG: [[BOUNDED:%.+]] = select [[MAXLT]], [[CMAX]], [[LOWER]]
852  // CHECK-DAG: [[TRUNC:%.+]] = trunci [[BOUNDED]]
853  // CHECK-DAG: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[TRUNC]] : i8 to ui8
854  // CHECK: linalg.yield [[CAST]]
855  %1 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<2xi8>)  -> (tensor<2xui8>)
856
857  // CHECK: return
858  return
859}
860
861// -----
862
863// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
864
865// CHECK-LABEL: @rescale_ui8
866func @rescale_ui8(%arg0 : tensor<2xui8>) -> () {
867  // CHECK: [[C0:%.+]] = constant 19689
868  // CHECK: [[C1:%.+]] = constant 15
869  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2]
870  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0 : tensor<2xui8>) outs([[INIT]] : tensor<2xi8>)
871  // CHECK: ^bb0([[IN:%.+]]: ui8, [[UNUSED:%.+]]: i8):
872  // CHECK: [[C17:%.+]] = constant 17
873  // CHECK: [[C22:%.+]] = constant 22
874  // CHECK-DAG: [[CAST:%.+]] = builtin.unrealized_conversion_cast [[IN]] : ui8 to i8
875  // CHECK-DAG: [[IN32:%.+]] = zexti [[CAST]]
876  // CHECK-DAG: [[IN_ZEROED:%.+]] = subi [[IN32]], [[C17]]
877  // CHECK-DAG: [[SCALED:%.+]] = "tosa.apply_scale"([[IN_ZEROED]], [[C0]], [[C1]]) {double_round = false}
878  // CHECK-DAG: [[SCALED_ZEROED:%.+]] = addi [[SCALED]], [[C22]]
879  // CHECK-DAG: [[CMIN:%.+]] = constant -128
880  // CHECK-DAG: [[CMAX:%.+]] = constant 127
881  // CHECK-DAG: [[MINLT:%.+]] = cmpi slt, [[SCALED_ZEROED]], [[CMIN]]
882  // CHECK-DAG: [[LOWER:%.+]] = select [[MINLT]], [[CMIN]], [[SCALED_ZEROED]]
883  // CHECK-DAG: [[MAXLT:%.+]] = cmpi slt, [[CMAX]], [[SCALED_ZEROED]]
884  // CHECK-DAG: [[BOUNDED:%.+]] = select [[MAXLT]], [[CMAX]], [[LOWER]]
885  // CHECK-DAG: [[TRUNC:%.+]] = trunci [[BOUNDED]]
886  // CHECK: linalg.yield [[TRUNC]]
887  %0 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<2xui8>)  -> (tensor<2xi8>)
888
889  return
890}
891
892// -----
893
894// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
895
896// CHECK-LABEL: @rescale_per_channel
897func @rescale_per_channel(%arg0 : tensor<2xi8>) -> (tensor<2xi8>) {
898  // CHECK: [[MULTIPLIERS:%.+]] = constant dense<[42, 43]>
899  // CHECK: [[SHIFTS:%.+]] = constant dense<[14, 15]>
900  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2]
901  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%arg0, [[MULTIPLIERS]], [[SHIFTS]] : tensor<2xi8>, tensor<2xi32>, tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
902  // CHECK: ^bb0([[IN:%.+]]: i8, [[MULTIPLIER:%.+]]: i32, [[SHIFT:%.+]]: i8, [[UNUSED:%.+]]: i8):
903  // CHECK: [[C243:%.+]] = constant 243
904  // CHECK: [[C252:%.+]] = constant 252
905
906  // CHECK-DAG: [[IN32:%.+]] = sexti [[IN]]
907  // CHECK-DAG: [[IN_ZEROED:%.+]] = subi [[IN32]], [[C243]]
908  // CHECK-DAG: [[SCALED:%.+]] = "tosa.apply_scale"([[IN_ZEROED]], [[MULTIPLIER]], [[SHIFT]]) {double_round = false}
909  // CHECK-DAG: [[SCALED_ZEROED:%.+]] = addi [[SCALED]], [[C252]]
910  // CHECK-DAG: [[CMIN:%.+]] = constant -128
911  // CHECK-DAG: [[CMAX:%.+]] = constant 127
912  // CHECK-DAG: [[MINLT:%.+]] = cmpi slt, [[SCALED_ZEROED]], [[CMIN]]
913  // CHECK-DAG: [[MAXLT:%.+]] = cmpi slt, [[CMAX]], [[SCALED_ZEROED]]
914  // CHECK-DAG: [[LOWER:%.+]] = select [[MINLT]], [[CMIN]], [[SCALED_ZEROED]]
915  // CHECK-DAG: [[BOUNDED:%.+]] = select [[MAXLT]], [[CMAX]], [[LOWER]]
916  // CHECK-DAG: [[TRUNC:%.+]] = trunci [[BOUNDED]]
917  // CHECK-DAG: linalg.yield [[TRUNC]]
918  %0 = "tosa.rescale"(%arg0) {input_zp = 243 : i32, output_zp = 252 : i32, multiplier = [42 : i32, 43 : i32], shift = [14 : i32, 15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<2xi8>)  -> (tensor<2xi8>)
919
920  // CHECK: return [[GENERIC]]
921  return %0 : tensor<2xi8>
922}
923
924// -----
925
926// CHECK-LABEL: @rescaleDoubleRound
927func @rescaleDoubleRound(%arg0 : tensor<2xi8>) -> (tensor<2xi8>) {
928  // CHECK: linalg.generic
929  // CHECK: "tosa.apply_scale"
930  // CHECK-SAME:  {double_round = true}
931  %0 = "tosa.rescale"(%arg0) {input_zp = 243 : i32, output_zp = 252 : i32, multiplier = [19689 : i32], shift = [33 : i32], scale32 = true, double_round = true, per_channel = false} : (tensor<2xi8>)  -> (tensor<2xi8>)
932  return %0 : tensor<2xi8>
933}
934
935// CHECK-LABEL: @rescaleUnnecessaryDoubleRound
936func @rescaleUnnecessaryDoubleRound(%arg0 : tensor<2xi8>) -> (tensor<2xi8>) {
937  // CHECK: linalg.generic
938  // CHECK: "tosa.apply_scale"
939  // CHECK-SAME:  {double_round = false}
940  %0 = "tosa.rescale"(%arg0) {input_zp = 243 : i32, output_zp = 252 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = true, double_round = true, per_channel = false} : (tensor<2xi8>)  -> (tensor<2xi8>)
941  return %0 : tensor<2xi8>
942}
943
944// -----
945
946// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
947
948// CHECK-LABEL: @reverse
949func @reverse(%arg0: tensor<5x4xi32>) -> () {
950  // CHECK: %[[C0:.+]] = constant 0
951  // CHECK: %[[RDIM:.+]] = tensor.dim %arg0, %[[C0]]
952  // CHECK: %[[INIT:.+]] = linalg.init_tensor [5, 4]
953  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
954  // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
955  // CHECK-DAG:   %[[I1:.+]] = linalg.index 1
956  // CHECK-DAG:   %[[SUB1:.+]] = constant 1
957  // CHECK-DAG:   %[[RDIM_MINUS_C1:.+]] = subi %[[RDIM]], %[[SUB1]]
958  // CHECK-DAG:   %[[READ_DIM:.+]] = subi %[[RDIM_MINUS_C1]], %[[I0]]
959  // CHECK-DAG:   %[[EXTRACT:.+]] = tensor.extract %arg0[%[[READ_DIM]], %[[I1]]] : tensor<5x4xi32>
960  // CHECK:   linalg.yield %[[EXTRACT]]
961  %0 = "tosa.reverse"(%arg0) {axis = 0 : i64} : (tensor<5x4xi32>) -> tensor<5x4xi32>
962
963  // CHECK: %[[C1:.+]] = constant 1
964  // CHECK: %[[RDIM:.+]] = tensor.dim %arg0, %[[C1]]
965  // CHECK: %[[INIT:.+]] = linalg.init_tensor [5, 4]
966  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
967  // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
968  // CHECK-DAG:   %[[I1:.+]] = linalg.index 1
969  // CHECK-DAG:   %[[SUB1:.+]] = constant 1
970  // CHECK-DAG:   %[[RDIM_MINUS_C1:.+]] = subi %[[RDIM]], %[[SUB1]]
971  // CHECK-DAG:   %[[READ_DIM:.+]] = subi %[[RDIM_MINUS_C1]], %[[I1]]
972  // CHECK-DAG:   %[[EXTRACT:.+]] = tensor.extract %arg0[%[[I0]], %[[READ_DIM]]] : tensor<5x4xi32>
973  // CHECK:   linalg.yield %[[EXTRACT]]
974  %1 = "tosa.reverse"(%arg0) {axis = 1 : i64} : (tensor<5x4xi32>) -> tensor<5x4xi32>
975  return
976}
977
978// -----
979
980// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
981
982// CHECK-LABEL: @reverse_dyn
983func @reverse_dyn(%arg0: tensor<?xi32>) -> () {
984  // CHECK: %[[C0_1:.+]] = constant 0
985  // CHECK: %[[D0_1:.+]] = tensor.dim %arg0, %[[C0_1]]
986  // CHECK: %[[C0_2:.+]] = constant 0
987  // CHECK: %[[D0_2:.+]] = tensor.dim %arg0, %[[C0_2]]
988  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[D0_1]]]
989  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} outs(%[[INIT]] : tensor<?xi32>)
990  // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
991  // CHECK-DAG:   %[[SUB1:.+]] = constant 1
992  // CHECK-DAG:   %[[RDIM_MINUS_C1:.+]] = subi %[[D0_2]], %[[SUB1]]
993  // CHECK-DAG:   %[[READ_DIM:.+]] = subi %[[RDIM_MINUS_C1]], %[[I0]]
994  // CHECK-DAG:   %[[EXTRACT:.+]] = tensor.extract %arg0[%[[READ_DIM]]] : tensor<?xi32>
995  // CHECK:   linalg.yield %[[EXTRACT]]
996  %0 = "tosa.reverse"(%arg0) {axis = 0 : i64} : (tensor<?xi32>) -> tensor<?xi32>
997  return
998}
999
1000// -----
1001
1002// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)>
1003// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1004
1005// CHECK-LABEL: @tile
1006func @tile(%arg0 : tensor<2x3xi8>) -> () {
1007  // CHECK: [[INIT:%.+]] = linalg.init_tensor [2, 2, 1, 3]
1008  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs([[INIT]] : tensor<2x2x1x3xi8>)
1009  // CHECK:   linalg.yield %arg1 : i8
1010  // CHECK: linalg.tensor_collapse_shape [[GENERIC]] {{\[}}[0, 1, 2], [3]]
1011  %0 = "tosa.tile"(%arg0) {multiples = [2, 1]} : (tensor<2x3xi8>)  -> (tensor<4x3xi8>)
1012
1013  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 2, 2, 3]
1014  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs([[INIT]] : tensor<1x2x2x3xi8>)
1015  // CHECK:   linalg.yield %arg1 : i8
1016  // CHECK: linalg.tensor_collapse_shape [[GENERIC]] {{\[}}[0, 1], [2, 3]]
1017  %1 = "tosa.tile"(%arg0) {multiples = [1, 2]} : (tensor<2x3xi8>)  -> (tensor<2x6xi8>)
1018
1019  // CHECK: [[INIT:%.+]] = linalg.init_tensor [5, 2, 7, 3]
1020  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<2x3xi8>) outs([[INIT]] : tensor<5x2x7x3xi8>)
1021  // CHECK:   linalg.yield %arg1 : i8
1022  // CHECK: linalg.tensor_collapse_shape [[GENERIC]] {{\[}}[0, 1], [2, 3]]
1023  %2 = "tosa.tile"(%arg0) {multiples = [5, 7]} : (tensor<2x3xi8>)  -> (tensor<10x21xi8>)
1024
1025  return
1026}
1027
1028// -----
1029
1030
1031// CHECK-LABEL: @matmul
1032func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) {
1033  // CHECK: [[C0:%.+]] = constant 0
1034  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6]
1035  // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32>
1036  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
1037  %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>)  -> (tensor<1x5x6xf32>)
1038  return %0 : tensor<1x5x6xf32>
1039}
1040
1041// -----
1042
1043
1044// CHECK-LABEL: @matmul_quantized
1045func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) {
1046  // CHECK: [[C0:%.+]] = constant 0
1047  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6]
1048  // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : i32, tensor<1x5x6xi32> -> tensor<1x5x6xi32>
1049  // CHECK: [[ONE:%.+]] = constant 1
1050  // CHECK: [[TWO:%.+]] = constant 2
1051  // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
1052  %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = {a_zp = 1 : i32, b_zp = 2 : i32}} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>)
1053  return %0 : tensor<1x5x6xi32>
1054}
1055
1056// -----
1057
1058// CHECK-LABEL: @matmul_dyn_batch
1059func @matmul_dyn_batch(%arg0: tensor<?x5x3xf32>, %arg1: tensor<?x3x6xf32>) -> (tensor<?x5x6xf32>) {
1060  // CHECK: %[[C0:.+]] = constant 0
1061  // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
1062  // CHECK: %[[C0_0:.+]] = constant 0
1063  // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 5, 6]
1064  // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0_0]], %[[INIT]]) : f32, tensor<?x5x6xf32> -> tensor<?x5x6xf32>
1065  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x5x3xf32>, tensor<?x3x6xf32>) outs(%[[FILLED]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
1066  %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<?x5x3xf32>, tensor<?x3x6xf32>)  -> (tensor<?x5x6xf32>)
1067  return %0 : tensor<?x5x6xf32>
1068}
1069
1070// -----
1071
1072// CHECK-LABEL: @matmul_dyn_independent_dim
1073func @matmul_dyn_independent_dim(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) {
1074  // CHECK: %[[C2:.+]] = constant 2
1075  // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]]
1076  // CHECK: %[[C0:.+]] = constant 0
1077  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, %[[DIM]]]
1078  // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x?xf32> -> tensor<1x5x?xf32>
1079  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
1080  %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>)  -> (tensor<1x5x?xf32>)
1081  return %0 : tensor<1x5x?xf32>
1082}
1083
1084// -----
1085
1086// CHECK-LABEL: @matmul_dyn_independent_dim
1087func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) {
1088  // CHECK: %[[C0:.+]] = constant 0
1089  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, 6]
1090  // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32>
1091  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
1092  %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>)  -> (tensor<1x5x6xf32>)
1093  return %0 : tensor<1x5x6xf32>
1094}
1095
1096// -----
1097
1098// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1, d0)>
1099// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1100// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d1)>
1101
1102// CHECK-LABEL: @fully_connected
1103func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) {
1104  // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6]
1105  // CHECK: [[ZERO:%.+]] = constant 0
1106  // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]])
1107  // CHECK: [[PERM:%.+]] = constant dense<[1, 0]>
1108  // CHECK: [[INITT:%.+]] = linalg.init_tensor [3, 6]
1109  // CHECK: [[TRANSPOSE:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xf32>) outs([[INITT]] : tensor<3x6xf32>) {
1110  // CHECK: ^bb0([[IN:%.+]]: f32, [[UNUSED:%.+]]: f32):
1111  // CHECK:   linalg.yield [[IN]] : f32
1112  // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]
1113  // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32>
1114  // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) {
1115  // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
1116  // CHECK:   [[ADD:%.+]] = addf %arg3, %arg4 : f32
1117  // CHECK:   linalg.yield [[ADD]] : f32
1118
1119  %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<5x3xf32>, tensor<6x3xf32>, tensor<6xf32>)  -> (tensor<5x6xf32>)
1120  return %0 : tensor<5x6xf32>
1121}
1122
1123// -----
1124
1125// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1, d0)>
1126// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1127// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d1)>
1128
1129// CHECK-LABEL: @quantized_fully_connected
1130func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) {
1131  // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6]
1132  // CHECK: [[ZERO:%.+]] = constant 0
1133  // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]])
1134  // CHECK: [[PERM:%.+]] = constant dense<[1, 0]>
1135  // CHECK: [[INITT:%.+]] = linalg.init_tensor [3, 6]
1136  // CHECK: [[TRANSPOSE:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xi8>) outs([[INITT]] : tensor<3x6xi8>) {
1137  // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
1138  // CHECK:   linalg.yield [[IN]] : i8
1139  // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]
1140  // CHECK: [[ONE:%.+]] = constant 1
1141  // CHECK: [[TWO:%.+]] = constant 2
1142  // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32>
1143  // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]]
1144  // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32):
1145  // CHECK:   [[ADD:%.+]] = addi
1146  // CHECK:   linalg.yield [[ADD]] : i32
1147  %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) {quantization_info = {input_zp = 1:i32, weight_zp = 2:i32}} : (tensor<5x3xi8>, tensor<6x3xi8>, tensor<6xi32>)  -> (tensor<5x6xi32>)
1148  return %0 : tensor<5x6xi32>
1149}
1150
1151// -----
1152
1153// CHECK-LABEL: @fully_connected_dyn
1154func @fully_connected_dyn(%arg0: tensor<?x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<?x6xf32>) {
1155  // CHECK: %[[C0:.+]] = constant 0
1156  // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
1157  // CHECK: %[[INITT:.+]] = linalg.init_tensor [%[[DIM]], 6]
1158  // CHECK: %[[ZERO:.+]] = constant 0
1159  // CHECK: %[[FILL:.+]] = linalg.fill(%[[ZERO]], %[[INITT]])
1160  // CHECK: %[[PERM:.+]] = constant dense<[1, 0]>
1161  // CHECK: %[[INITT:.+]] = linalg.init_tensor [3, 6]
1162  // CHECK: %[[TRANSPOSE:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xf32>) outs(%[[INITT]] : tensor<3x6xf32>) {
1163  // CHECK: ^bb0(%[[IN:.+]]: f32, %[[UNUSED:.+]]: f32):
1164  // CHECK:   linalg.yield %[[IN]] : f32
1165  // CHECK: %[[INITB:.+]] = linalg.init_tensor [%[[DIM]], 6]
1166  // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor<?x3xf32>, tensor<3x6xf32>) outs(%[[FILL]] : tensor<?x6xf32>) -> tensor<?x6xf32>
1167  // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor<?x6xf32>) outs(%[[INITB]] : tensor<?x6xf32>) {
1168  // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
1169  // CHECK:   %[[ADD:.+]] = addf %arg3, %arg4 : f32
1170  // CHECK:   linalg.yield %[[ADD]] : f32
1171
1172  %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<?x3xf32>, tensor<6x3xf32>, tensor<6xf32>)  -> (tensor<?x6xf32>)
1173  return %0 : tensor<?x6xf32>
1174}
1175
1176// -----
1177
1178func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
1179  %0 = constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32>
1180  // TODO: Output contains multiple "constant 1 : index".
1181  // CHECK: [[INDEX1:%.+]] = constant 1 : index
1182  // CHECK: [[INDEX2:%.+]] = constant 2 : index
1183  // CHECK: [[INDEX3:%.+]] = constant 3 : index
1184  // CHECK: [[INDEX4:%.+]] = constant 4 : index
1185  // CHECK: [[CST:%.+]] = constant 0.000000e+00 : f32
1186  // CHECK: linalg.pad_tensor %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
1187  // CHECK: ^bb0(%arg1: index, %arg2: index):  // no predecessors
1188  // CHECK:   linalg.yield [[CST]]
1189  // CHECK: } : tensor<1x2xf32> to tensor<4x9xf32>
1190  %1 = "tosa.pad"(%arg0, %0)  : (tensor<1x2xf32>, tensor<2x2xi32>)  -> (tensor<4x9xf32>)
1191  return %1 : tensor<4x9xf32>
1192}
1193
1194func @pad_int(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) {
1195  %0 = constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32>
1196  // CHECK: [[CST:%.+]] = constant 0 : i32
1197  // CHECK: linalg.pad_tensor
1198  // CHECK:   linalg.yield [[CST]]
1199  %1 = "tosa.pad"(%arg0, %0)  : (tensor<1x2xi32>, tensor<2x2xi32>)  -> (tensor<4x9xi32>)
1200  return %1 : tensor<4x9xi32>
1201}
1202
1203func @pad_quant(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) {
1204  %0 = constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32>
1205  // CHECK: [[CST:%.+]] = constant 42 : i32
1206  // CHECK: linalg.pad_tensor
1207  // CHECK:   linalg.yield [[CST]]
1208  %1 = "tosa.pad"(%arg0, %0) { quantization_info = { input_zp = 42 : i32}} : (tensor<1x2xi32>, tensor<2x2xi32>)  -> (tensor<4x9xi32>)
1209  return %1 : tensor<4x9xi32>
1210}
1211
1212// -----
1213
1214// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1215// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
1216// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0)>
1217// CHECK: #[[$MAP3:.*]] = affine_map<(d0) -> (d0)>
1218// CHECK: #[[$MAP4:.*]] = affine_map<(d0) -> ()>
1219
1220func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () {
1221  // CHECK: [[IDX_INIT:%.+]] = linalg.init_tensor [2]
1222  // CHECK: [[IDX_MIN:%.+]] = constant 0 : i32
1223  // CHECK: [[IDX_FILL:%.+]] = linalg.fill([[IDX_MIN]], [[IDX_INIT]])
1224  // CHECK: [[VAL_INIT:%.+]] = linalg.init_tensor [2]
1225  // CHECK: [[VAL_MIN:%.+]] = constant -2147483648
1226  // CHECK: [[VAL_FILL:%.+]] = linalg.fill([[VAL_MIN]], [[VAL_INIT]])
1227  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%arg0 : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>)
1228  // CHECK:   [[IDX:%.+]] = linalg.index 0
1229  // CHECK:   [[CAST:%.+]] = index_cast [[IDX]]
1230  // CHECK:   [[CMP:%.+]] = cmpi sgt, %arg2, %arg4
1231  // CHECK:   [[SELECT_VAL:%.+]] = select [[CMP]], %arg2, %arg4
1232  // CHECK:   [[SELECT_IDX:%.+]] = select [[CMP]], [[CAST]], %arg3
1233  // CHECK:   linalg.yield [[SELECT_IDX]], [[SELECT_VAL]]
1234  %0 = "tosa.argmax"(%arg0) { axis = 0 : i64} : (tensor<3x2xi32>)  -> (tensor<2xi32>)
1235
1236  // CHECK: [[IDX_INIT:%.+]] = linalg.init_tensor [3]
1237  // CHECK: [[IDX_MIN:%.+]] = constant 0 : i32
1238  // CHECK: [[IDX_FILL:%.+]] = linalg.fill([[IDX_MIN]], [[IDX_INIT]])
1239  // CHECK: [[VAL_INIT:%.+]] = linalg.init_tensor [3]
1240  // CHECK: [[VAL_MIN:%.+]] = constant -2147483648
1241  // CHECK: [[VAL_FILL:%.+]] = linalg.fill([[VAL_MIN]], [[VAL_INIT]])
1242  // CHECK: linalg.generic {indexing_maps = [#map0, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
1243  // CHECK:   [[IDX:%.+]] = linalg.index 1
1244  // CHECK:   [[CAST:%.+]] = index_cast [[IDX]]
1245  // CHECK:   [[CMP:%.+]] = cmpi sgt, %arg2, %arg4
1246  // CHECK:   [[SELECT_VAL:%.+]] = select [[CMP]], %arg2, %arg4
1247  // CHECK:   [[SELECT_IDX:%.+]] = select [[CMP]], [[CAST]], %arg3
1248  // CHECK:   linalg.yield [[SELECT_IDX]], [[SELECT_VAL]]
1249  %1 = "tosa.argmax"(%arg0) { axis = 1 : i64} : (tensor<3x2xi32>)  -> (tensor<3xi32>)
1250
1251  // CHECK: constant -3.40282347E+38 : f32
1252  // CHECK: linalg.index
1253  // CHECK: index_cast
1254  // CHECK: cmpf ogt
1255  // CHECK: select
1256  // CHECK: select
1257  // CHECK: linalg.yield
1258  %2 = "tosa.argmax"(%arg1) { axis = 0 : i64} : (tensor<6xf32>)  -> (tensor<i32>)
1259
1260  return
1261}
1262
1263// -----
1264
1265// CHECK-LABEL: @gather_float
1266func @gather_float(%arg0: tensor<2x3x2xf32>, %arg1: tensor<2x3xi32>) -> () {
1267  // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, 3, 2]
1268  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1 : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xf32>)
1269  // CHECK: ^bb0(%[[ARG0:.+]]: i32, %[[ARG1:.+]]: f32)
1270  // CHECK:   %[[IDX0:.+]] = linalg.index 0
1271  // CHECK:   %[[CAST:.+]] = index_cast %[[ARG0]]
1272  // CHECK:   %[[IDX2:.+]] = linalg.index 2
1273  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %arg0[%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<2x3x2xf32>
1274  // CHECK:   linalg.yield %[[EXTRACT]]
1275  %0 = "tosa.gather"(%arg0, %arg1)  : (tensor<2x3x2xf32>, tensor<2x3xi32>)  -> (tensor<2x3x2xf32>)
1276  return
1277}
1278
1279// CHECK-LABEL: @gather_int
1280func @gather_int(%arg0: tensor<2x3x2xi32>, %arg1: tensor<2x3xi32>) -> () {
1281  // CHECK: %[[INIT:.+]] = linalg.init_tensor [2, 3, 2]
1282  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1 : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xi32>)
1283  // CHECK: ^bb0(%[[ARG0:.+]]: i32, %[[ARG1:.+]]: i32)
1284  // CHECK:   %[[IDX0:.+]] = linalg.index 0
1285  // CHECK:   %[[CAST:.+]] = index_cast %[[ARG0]]
1286  // CHECK:   %[[IDX2:.+]] = linalg.index 2
1287  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %arg0[%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<2x3x2xi32>
1288  // CHECK:   linalg.yield %[[EXTRACT]]
1289  %0 = "tosa.gather"(%arg0, %arg1)  : (tensor<2x3x2xi32>, tensor<2x3xi32>)  -> (tensor<2x3x2xi32>)
1290  return
1291}
1292
1293// -----
1294
1295// CHECK-LABEL: @table8
1296func @table8(%arg0: tensor<6xi8>, %arg1: tensor<512xi8>) -> () {
1297  // CHECK: %[[INIT:.+]] = linalg.init_tensor [6]
1298  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
1299  // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
1300  // CHECK:   %[[CAST:.+]] = index_cast %[[ARG_IN]]
1301  // CHECK:   %[[OFFSET:.+]] = constant 128
1302  // CHECK:   %[[ADD:.+]] = addi %[[CAST]], %[[OFFSET]]
1303  // CHECK:   %[[EXTRACT:.+]] = tensor.extract %arg1[%[[ADD]]]
1304  // CHECK:   linalg.yield %[[EXTRACT]]
1305  %0 = "tosa.table"(%arg0, %arg1)  : (tensor<6xi8>, tensor<512xi8>)  -> (tensor<6xi8>)
1306  return
1307}
1308
1309// -----
1310
1311// CHECK-LABEL: @table16
1312func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () {
1313  // CHECK: %[[INIT:.+]] = linalg.init_tensor [6]
1314  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<6xi16>) outs(%[[INIT]] : tensor<6xi32>)
1315  // CHECK: ^bb0(%arg2: i16, %arg3: i32)
1316  // CHECK: %[[EXT_IN:.+]] = sexti %arg2
1317  // CHECK: %[[C32768:.+]] = constant 32768
1318  // CHECK: %[[C7:.+]] = constant 7
1319  // CHECK: %[[C1:.+]] = constant 1
1320  // CHECK: %[[C127:.+]] = constant 127
1321  // CHECK: %[[INADD:.+]] = addi %[[EXT_IN]], %[[C32768]]
1322  // CHECK: %[[IDX:.+]] = shift_right_unsigned %[[INADD]], %[[C7]]
1323  // CHECK: %[[FRACTION:.+]] = and %[[INADD]], %[[C127]]
1324  // CHECK: %[[IDXPLUS1:.+]] = addi %[[IDX]], %[[C1]]
1325  // CHECK: %[[IDX_CAST:.+]] = index_cast %[[IDX]]
1326  // CHECK: %[[IDXPLUS1_CAST:.+]] = index_cast %[[IDXPLUS1]]
1327  // CHECK: %[[BASE:.+]] = tensor.extract %arg1[%[[IDX_CAST]]]
1328  // CHECK: %[[NEXT:.+]] = tensor.extract %arg1[%[[IDXPLUS1_CAST]]]
1329  // CHECK: %[[BASE_EXT:.+]] = sexti %[[BASE]]
1330  // CHECK: %[[NEXT_EXT:.+]] = sexti %[[NEXT]]
1331  // CHECK: %[[BASE_MUL:.+]] = shift_left %[[BASE_EXT]], %[[C7]]
1332  // CHECK: %[[DIFF:.+]] = subi %[[NEXT_EXT]], %[[BASE_EXT]]
1333  // CHECK: %[[DIFF_MUL:.+]] = muli %[[DIFF]], %[[FRACTION]]
1334  // CHECK: %[[RESULT:.+]] = addi %[[BASE_MUL]], %[[DIFF_MUL]]
1335  // CHECK: linalg.yield %[[RESULT]]
1336  %0 = "tosa.table"(%arg0, %arg1)  : (tensor<6xi16>, tensor<513xi16>)  -> (tensor<6xi32>)
1337  return
1338}
1339
1340// -----
1341
1342// CHECK-LABEL: @max_pool
1343func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () {
1344  // CHECK-DAG: [[CONST:%.+]] = constant -3.40282347E+38
1345  // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 32, 62]
1346  // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[CONST]], [[INIT]])
1347  // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3]
1348  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>)
1349  %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>)  -> (tensor<1x4x32x62xf32>)
1350  return
1351}
1352
1353// CHECK-LABEL: @max_pool_padded
1354func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () {
1355  // CHECK-DAG: [[CONST:%.+]] = constant -3.40282347E+38 : f32
1356  // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0]
1357  // CHECK-DAG:   linalg.yield [[CONST]]
1358  // CHECK-DAG: [[INITVAL:%.+]] = constant -3.40282347E+38 : f32
1359  // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62]
1360  // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]])
1361  // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3]
1362  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>)
1363  %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>)  -> (tensor<1x4x33x62xf32>)
1364  return
1365}
1366
1367// CHECK-LABEL: @max_pool_i8
1368func @max_pool_i8(%arg0: tensor<1x6x34x62xi8>) -> () {
1369  // CHECK: constant -128
1370  // CHECK: linalg.pooling_nhwc_max
1371  %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi8>)  -> (tensor<1x4x32x62xi8>)
1372  return
1373}
1374
1375// CHECK-LABEL: @max_pool_i16
1376func @max_pool_i16(%arg0: tensor<1x6x34x62xi16>) -> () {
1377  // CHECK: constant -32768
1378  // CHECK: linalg.pooling_nhwc_max
1379  %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi16>)  -> (tensor<1x4x32x62xi16>)
1380  return
1381}
1382
1383// CHECK-LABEL: @max_pool_i32
1384func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () {
1385  // CHECK: constant -2147483648
1386  // CHECK: linalg.pooling_nhwc_max
1387  %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi32>)  -> (tensor<1x4x32x62xi32>)
1388  return
1389}
1390// -----
1391
1392// CHECK-LABEL: @avg_pool
1393func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) {
1394  // Initial piece computes the sum of the pooling region, with appropriate padding.
1395  // CHECK: [[CONST:%.+]] = constant 0
1396  // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
1397  // CHECK: [[CONST:%.+]] = constant 0
1398  // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62]
1399  // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]])
1400  // CHECK: [[KERNEL:%.+]] = linalg.init_tensor [4, 4]
1401  // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>)
1402  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 33, 62]
1403  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>)
1404  // CHECK:   [[ZERO:%.0]] = constant 0
1405  // CHECK:   [[ONE:%.+]] = constant 1
1406  // CHECK:   [[HEIGHT:%.+]] = constant 4
1407  // CHECK:   [[WIDTH:%.+]] = constant 32
1408  // CHECK:   [[IDX1:%.+]] = linalg.index 1
1409  // CHECK:   [[IDX2:%.+]] = linalg.index 2
1410
1411  // The large block below computes what portion of the kernel is within non-padded input.
1412  // CHECK:   [[NY:%.+]] = subi [[HEIGHT]], [[IDX1]]
1413  // CHECK:   [[NX:%.+]] = subi [[WIDTH]], [[IDX2]]
1414  // CHECK:   [[KH:%.+]] = constant 4
1415  // CHECK:   [[PAD0:%.+]] = constant 1
1416  // CHECK:   [[SUBP0:%.+]] = subi [[IDX1]], [[PAD0]]
1417  // CHECK:   [[P0CMP:%.+]] = cmpi slt, [[SUBP0]], [[ZERO]]
1418  // CHECK:   [[SELP0:%.+]] = select [[P0CMP]], [[SUBP0]], [[ZERO]]
1419  // CHECK:   [[ADDP0:%.+]] = addi [[KH]], [[SELP0]]
1420  // CHECK:   [[PAD1:%.+]] = constant 1
1421  // CHECK:   [[SUBP1:%.+]] = subi [[NY]], [[PAD1]]
1422  // CHECK:   [[P1CMP:%.+]] = cmpi slt, [[SUBP1]], [[ZERO]]
1423  // CHECK:   [[SELP1:%.+]] = select [[P1CMP]], [[SUBP1]], [[ZERO]]
1424  // CHECK:   [[ADDP1:%.+]] = addi [[ADDP0]], [[SELP1]]
1425  // CHECK:   [[YCMP:%.+]] = cmpi slt, [[ADDP1]], [[ONE]]
1426  // CHECK:   [[YSEL:%.+]] = select [[YCMP]], [[ONE]], [[ADDP1]]
1427  // CHECK:   [[KW:%.+]] = constant 4 : index
1428  // CHECK:   [[PAD2:%.+]] = constant 1 : index
1429  // CHECK:   [[SUBP2:%.+]] = subi [[IDX2]], [[PAD2]]
1430  // CHECK:   [[P2CMP:%.+]] = cmpi slt, [[SUBP2]], [[ZERO]]
1431  // CHECK:   [[SELP2:%.+]] = select [[P2CMP]], [[SUBP2]], [[ZERO]]
1432  // CHECK:   [[ADDP2:%.+]] = addi [[KW]], [[SELP2]]
1433  // CHECK:   [[PAD3:%.+]] = constant 1 : index
1434  // CHECK:   [[SUBP3:%.+]] = subi [[NX]], [[PAD3]]
1435  // CHECK:   [[P3CMP:%.+]] = cmpi slt, [[SUBP3]], [[ZERO]]
1436  // CHECK:   [[SELP3:%.+]] = select [[P3CMP]], [[SUBP3]], [[ZERO]]
1437  // CHECK:   [[ADDP3:%.+]] = addi [[ADDP2]], [[SELP3]]
1438  // CHECK:   [[XCMP:%.+]] = cmpi slt, [[ADDP3]], [[ONE]]
1439  // CHECK:   [[XSEL:%.+]] = select [[XCMP]], [[ONE]], [[ADDP3]]
1440
1441  // Given the valid coverage of the pooling region, normalize the summation.
1442  // CHECK:   [[C:%.+]] = muli [[YSEL]], [[XSEL]]
1443  // CHECK:   [[CI:%.+]] = index_cast [[C]]
1444  // CHECK:   [[CF:%.+]] = sitofp [[CI]]
1445  // CHECK:   [[RESULT:%.+]] = divf %arg1, [[CF]]
1446  // CHECK:   linalg.yield [[RESULT]]
1447  %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<1x6x34x62xf32>)  -> (tensor<1x5x33x62xf32>)
1448  return %0 : tensor<1x5x33x62xf32>
1449}
1450
1451// -----
1452
1453// CHECK-LABEL: @avg_pool_i8
1454func @avg_pool_i8(%arg0 : tensor<1x128x128x2xi8>) -> () {
1455
1456  // CHECK: linalg.pooling_nhwc_sum
1457  // CHECK: linalg.generic
1458
1459  // CHECK: %[[INZP:.+]] = constant -128
1460  // CHECK: %[[INZP_OFF:.+]] = muli %{{.+}}, %[[INZP]]
1461  // CHECK: %[[OFFSETED:.+]] = subi %arg1, %[[INZP_OFF]]
1462  // CHECK: %[[NUMERATOR:.+]] = constant 1073741825
1463  // CHECK: %[[MULTIPLIER:.+]] = divi_unsigned %[[NUMERATOR]], %{{.+}}
1464  // CHECK: %[[SHIFT:.+]] = constant 30
1465  // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false}
1466  // CHECK: %[[OUTZP:.+]] = constant -128
1467  // CHECK: %[[OUT:.+]] = addi %[[SCALE]], %[[OUTZP]]
1468  // CHECK: %[[MIN:.+]] = constant -128
1469  // CHECK: %[[MAX:.+]] = constant 127
1470  // CHECK: %[[CMP_MIN:.+]] = cmpi slt, %[[OUT]], %[[MIN]]
1471  // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]]
1472  // CHECK: %[[CMP_MAX:.+]] = cmpi slt, %[[MAX]], %[[OUT]]
1473  // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]]
1474  // CHECK: %[[TRUNC:.+]] = trunci %[[CLMP_MAX]]
1475  // CHECK: linalg.yield %[[TRUNC]]
1476  %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi8>) -> tensor<1x32x32x2xi8>
1477  return
1478}
1479
1480// -----
1481
1482// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>
1483// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1484// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)>
1485
1486// CHECK-LABEL: @conv2d_f32
1487func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () {
1488  // CHECK: %[[W_IN:.+]] = linalg.init_tensor [3, 3, 27, 28]
1489  // CHECK: %[[W:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<28x3x3x27xf32>) outs(%[[W_IN]] : tensor<3x3x27x28xf32>)
1490  // CHECK:   linalg.yield %arg3 : f32
1491  // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, 45, 40, 28]
1492  // CHECK: %[[CST:.+]] = constant 0
1493  // CHECK: %[[FILL:.+]] = linalg.fill
1494  // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, 45, 40, 28]
1495  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>)
1496  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>)
1497  // CHECK:   addf
1498  // CHECK:   linalg.yield %7 : f32
1499  %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>)  -> (tensor<1x45x40x28xf32>)
1500  return
1501}
1502
1503// -----
1504
1505// CHECK-LABEL: @conv2d_padded_f32
1506func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () {
1507  // CHECK: %[[C0:.+]] = constant 0
1508  // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
1509  // CHECK:   linalg.yield %[[C0]]
1510  // CHECK: linalg.conv_2d_nhwc_hwcf
1511  %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>)  -> (tensor<1x45x40x28xf32>)
1512  return
1513}
1514
1515// -----
1516
1517// CHECK-LABEL: @conv2d_quant
1518func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () {
1519  // CHECK:   %[[C22:.+]] = constant -22
1520  // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
1521  // CHECK:   linalg.yield %[[C22]]
1522  // CHECK: linalg.conv_2d_nhwc_hwcf_q
1523  %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32>
1524  return
1525}
1526
1527// -----
1528
1529// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
1530// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1531
1532// CHECK-LABEL: @depthwise_conv
1533func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
1534  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11]
1535  // CHECK: [[CST0:%.+]] = constant 0
1536  // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
1537  // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33]
1538  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv2D_nhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
1539  // CHECK: [[COLLAPSED:%.+]] = linalg.tensor_collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
1540  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
1541  // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  // no predecessors
1542  // CHECK:   [[ADD:%.+]] = addf %arg3, %arg4 : f32
1543  // CHECK:   linalg.yield [[ADD]] : f32
1544  // CHECK: } -> tensor<1x5x5x33xf32>
1545  %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [1, 1], dilation = [1, 1] } : (tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>)  -> (tensor<1x5x5x33xf32>)
1546  return
1547}
1548
1549// -----
1550
1551// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
1552// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1553
1554// CHECK-LABEL: @depthwise_conv_strides
1555func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
1556  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11]
1557  // CHECK: [[CST0:%.+]] = constant 0
1558  // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
1559  // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33]
1560  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv2D_nhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
1561  // CHECK: [[COLLAPSED:%.+]] = linalg.tensor_collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
1562  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
1563  // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  // no predecessors
1564  // CHECK:   [[ADD:%.+]] = addf %arg3, %arg4 : f32
1565  // CHECK:   linalg.yield [[ADD]] : f32
1566  // CHECK: } -> tensor<1x5x5x33xf32>
1567  %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [2, 2], dilation = [1, 1] } : (tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>)  -> (tensor<1x5x5x33xf32>)
1568  return
1569}
1570
1571// -----
1572
1573// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
1574// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1575
1576// CHECK-LABEL: @depthwise_conv_quant
1577func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () {
1578  // CHECK: [[PADV:%.+]] = constant -128
1579  // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
1580  // CHECK:   linalg.yield [[PADV]]
1581
1582  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128]
1583  // CHECK: [[CST0:%.+]] = constant 0
1584  // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
1585  // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 12, 12, 512]
1586  // CHECK: [[C128:%.+]] = constant -128
1587  // CHECK: [[C42:%.+]] = constant 42
1588  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv2D_nhwc_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>)
1589  // CHECK: [[COLLAPSED:%.+]] = linalg.tensor_collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
1590  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) {
1591  // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32):  // no predecessors
1592  // CHECK:   [[ADD:%.+]] = addi %arg3, %arg4 : i32
1593  // CHECK:   linalg.yield [[ADD]] : i32
1594  // CHECK: } -> tensor<1x12x12x512xi32>
1595  %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 1, 1, 1], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [1, 1] } : (tensor<1x12x12x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>)  -> tensor<1x12x12x512xi32>
1596  return
1597}
1598
1599// -----
1600
1601// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
1602// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1603
1604// CHECK-LABEL: @depthwise_conv_quant_dilations
1605func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () {
1606  // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 10, 10, 4, 128]
1607  // CHECK: [[CST0:%.+]] = constant 0
1608  // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
1609  // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 10, 10, 512]
1610  // CHECK: [[C128:%.+]] = constant -128
1611  // CHECK: [[C42:%.+]] = constant 42
1612  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv2D_nhwc_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>)
1613  // CHECK: [[COLLAPSED:%.+]] = linalg.tensor_collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
1614  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) {
1615  // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32):  // no predecessors
1616  // CHECK:   [[ADD:%.+]] = addi %arg3, %arg4 : i32
1617  // CHECK:   linalg.yield [[ADD]] : i32
1618  // CHECK: } -> tensor<1x10x10x512xi32>
1619  %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [2, 2] } : (tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>)  -> tensor<1x10x10x512xi32>
1620  return
1621}
1622
1623// -----
1624
1625// CHECK-LABEL: @transpose_conv
1626func @transpose_conv(%arg0 : tensor<1x12x12x2xf32>, %arg1 : tensor<4x3x3x2xf32>, %arg2 : tensor<4xf32>) -> () {
1627  // CHECK: linalg.pad_tensor %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0]
1628  // CHECK: linalg.conv_2d_nhwc_hwcf
1629  %0 = "tosa.transpose_conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], out_pad = [0, 0], out_shape = [1, 14, 14, 4], stride = [1, 1]} : (tensor<1x12x12x2xf32>, tensor<4x3x3x2xf32>, tensor<4xf32>) -> tensor<1x14x14x4xf32>
1630  return
1631}
1632
1633// -----
1634
1635// CHECK-LABEL: @transpose_conv_dilated
1636func @transpose_conv_dilated(%arg0 : tensor<1x12x12x2xf32>, %arg1 : tensor<4x3x3x2xf32>, %arg2 : tensor<4xf32>) -> () {
1637  // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 4, 4, 0] high[0, 4, 4, 0]
1638  // CHECK: linalg.conv_2d_nhwc_hwcf {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], {{%.+}} : tensor<1x20x20x2xf32>, tensor<3x3x2x4xf32>)
1639  %0 = "tosa.transpose_conv2d"(%arg0, %arg1, %arg2) {dilation = [2, 2], out_pad = [0, 0], out_shape = [1, 16, 16, 4], stride = [1, 1]} : (tensor<1x12x12x2xf32>, tensor<4x3x3x2xf32>, tensor<4xf32>) -> tensor<1x16x16x4xf32>
1640  return
1641}
1642
1643
1644// -----
1645
1646// CHECK-LABEL: @resize_nearest
1647func @resize_nearest(%input: tensor<1x2x2x1xf32>) -> () {
1648  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1]
1649  // CHECK: %[[GENERIC:.+]] = linalg.generic
1650  // CHECK: %[[IDX0:.+]] = linalg.index 0
1651  // CHECK: %[[IDX1:.+]] = linalg.index 1
1652  // CHECK: %[[IDX2:.+]] = linalg.index 2
1653  // CHECK: %[[IDX3:.+]] = linalg.index 3
1654  // CHECK-DAG: %[[XYMIN:.+]] = constant 0
1655  // CHECK-DAG: %[[YMAX:.+]] = constant 1
1656  // CHECK-DAG: %[[XMAX:.+]] = constant 1
1657  // CHECK-DAG: %[[Y:.+]] = index_cast %[[IDX1]]
1658  // CHECK-DAG: %[[X:.+]] = index_cast %[[IDX2]]
1659  // CHECK-DAG: %[[STRIDEY:.+]] = constant 5.000000e-01
1660  // CHECK-DAG: %[[STRIDEX:.+]] = constant 5.000000e-01
1661  // CHECK-DAG: %[[OFFSETY:.+]] = constant 1.000000e-01
1662  // CHECK-DAG: %[[OFFSETX:.+]] = constant 2.000000e-01
1663  // CHECK-DAG: %[[VAL4:.+]] = uitofp %[[Y]]
1664  // CHECK-DAG: %[[VAL5:.+]] = uitofp %[[X]]
1665  // CHECK-DAG: %[[VAL6:.+]] = mulf %[[VAL4]], %[[STRIDEY]]
1666  // CHECK-DAG: %[[VAL7:.+]] = mulf %[[VAL5]], %[[STRIDEX]]
1667  // CHECK-DAG: %[[VAL8:.+]] = addf %[[VAL6]], %[[OFFSETY]]
1668  // CHECK-DAG: %[[VAL9:.+]] = addf %[[VAL7]], %[[OFFSETX]]
1669
1670  // Find the remainder and integer component of the target index.
1671
1672  // CHECK-DAG: %[[VAL10:.+]] = floorf %[[VAL8]]
1673  // CHECK-DAG: %[[VAL11:.+]] = floorf %[[VAL9]]
1674  // CHECK-DAG: %[[VAL12:.+]] = subf %[[VAL8]], %[[VAL10]]
1675  // CHECK-DAG: %[[VAL13:.+]] = subf %[[VAL9]], %[[VAL11]]
1676  // CHECK-DAG: %[[VAL14:.+]] = fptosi %[[VAL10]]
1677  // CHECK-DAG: %[[VAL15:.+]] = fptosi %[[VAL11]]
1678
1679  // Round to the nearest index.
1680
1681  // CHECK-DAG: %[[ROUND:.+]] = constant 5.000000e-01
1682  // CHECK-DAG: %[[VAL16:.+]] = cmpf oge, %[[VAL12]], %[[ROUND]]
1683  // CHECK-DAG: %[[VAL17:.+]] = cmpf oge, %[[VAL13]], %[[ROUND]]
1684  // CHECK-DAG: %[[ZERO:.+]] = constant 0
1685  // CHECK-DAG: %[[ONE:.+]] = constant 1
1686  // CHECK-DAG: %[[VAL18:.+]] = select %[[VAL16]], %[[ONE]], %[[ZERO]]
1687  // CHECK-DAG: %[[VAL19:.+]] = select %[[VAL17]], %[[ONE]], %[[ZERO]]
1688  // CHECK-DAG: %[[VAL20:.+]] = addi %[[VAL14]], %[[VAL18]]
1689  // CHECK-DAG: %[[VAL21:.+]] = addi %[[VAL15]], %[[VAL19]]
1690
1691  // This section applies bound checking to be within the input image.
1692
1693  // CHECK-DAG: %[[VAL22:.+]] = cmpi slt, %[[VAL20]], %[[XYMIN]]
1694  // CHECK-DAG: %[[VAL23:.+]] = select %[[VAL22]], %[[XYMIN]], %[[VAL20]]
1695  // CHECK-DAG: %[[VAL24:.+]] = cmpi slt, %[[YMAX]], %[[VAL20]]
1696  // CHECK-DAG: %[[VAL25:.+]] = select %[[VAL24]], %[[YMAX]], %[[VAL23]]
1697  // CHECK-DAG: %[[VAL26:.+]] = cmpi slt, %[[VAL21]], %[[XYMIN]]
1698  // CHECK-DAG: %[[VAL27:.+]] = select %[[VAL26]], %[[XYMIN]], %[[VAL21]]
1699  // CHECK-DAG: %[[VAL28:.+]] = cmpi slt, %[[XMAX]], %[[VAL21]]
1700  // CHECK-DAG: %[[VAL29:.+]] = select %[[VAL28]], %[[XMAX]], %[[VAL27]]
1701
1702  // Extract the nearest value using the computed indices.
1703
1704  // CHECK-DAG: %[[IDY:.+]] = index_cast %[[VAL25]]
1705  // CHECK-DAG: %[[IDX:.+]] = index_cast %[[VAL29]]
1706  // CHECK-DAG: %[[EXTRACT:.+]] = tensor.extract %arg0[%[[IDX0]], %[[IDY]], %[[IDX]], %[[IDX3]]]
1707  // CHECK: linalg.yield %[[EXTRACT]]
1708  %output = "tosa.resize"(%input) { output_size = [4, 4], stride = [0, 0], offset = [0, 0], stride_fp = [0.5 : f32, 0.5 : f32], offset_fp = [0.1 : f32, 0.2 : f32], shift = 0 : i32, mode = "NEAREST_NEIGHBOR" } : (tensor<1x2x2x1xf32>)  -> (tensor<1x4x4x1xf32>)
1709
1710  return
1711}
1712
1713// -----
1714
1715// CHECK-LABEL: @resize_bilinear
1716func @resize_bilinear(%input: tensor<1x2x2x1xf32>) -> () {
1717  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1]
1718  // CHECK: %[[GENERIC:.+]] = linalg.generic
1719  // CHECK: %[[IDX0:.+]] = linalg.index 0
1720  // CHECK: %[[IDX1:.+]] = linalg.index 1
1721  // CHECK: %[[IDX2:.+]] = linalg.index 2
1722  // CHECK: %[[IDX3:.+]] = linalg.index 3
1723  // CHECK: %[[XYMIN:.+]] = constant 0
1724  // CHECK: %[[YMAX:.+]] = constant 1
1725  // CHECK: %[[XMAX:.+]] = constant 1
1726
1727  // CHECK: %[[VAL10:.+]] = floorf %[[VAL8:.+]]
1728  // CHECK: %[[VAL11:.+]] = floorf %[[VAL9:.+]]
1729
1730  // CHECK: %[[DY:.+]] = subf %[[VAL8:.+]], %[[VAL10]]
1731  // CHECK: %[[DX:.+]] = subf %[[VAL9:.+]], %[[VAL11]]
1732
1733  // CHECK: %[[Y0:.+]] = fptosi %[[VAL10]]
1734  // CHECK: %[[X0:.+]] = fptosi %[[VAL11]]
1735
1736  // Compute the left, right, and top indices for the bilinear interpolation.
1737
1738  // CHECK: %[[ONE:.+]] = constant 1
1739  // CHECK: %[[Y1:.+]] = addi %[[Y0]], %[[ONE]]
1740  // CHECK: %[[X1:.+]] = addi %[[X0]], %[[ONE]]
1741
1742  // Bound check each dimension.
1743
1744  // CHECK: %[[PRED:.+]] = cmpi slt, %[[Y0]], %[[XYMIN]]
1745  // CHECK: %[[BOUND:.+]] = select %[[PRED]], %[[XYMIN]], %[[Y0]]
1746  // CHECK: %[[PRED:.+]] = cmpi slt, %[[YMAX]], %[[Y0]]
1747  // CHECK: %[[YLO:.+]] = select %[[PRED]], %[[YMAX]], %[[BOUND]]
1748
1749  // CHECK: %[[PRED:.+]] = cmpi slt, %[[Y1]], %[[XYMIN]]
1750  // CHECK: %[[BOUND:.+]] = select %[[PRED]], %[[XYMIN]], %[[Y1]]
1751  // CHECK: %[[PRED:.+]] = cmpi slt, %[[YMAX]], %[[Y1]]
1752  // CHECK: %[[YHI:.+]] = select %[[PRED]], %[[YMAX]], %[[BOUND]]
1753
1754  // CHECK: %[[PRED:.+]] = cmpi slt, %[[X0]], %[[XYMIN]]
1755  // CHECK: %[[BOUND:.+]] = select %[[PRED]], %[[XYMIN]], %[[X0]]
1756  // CHECK: %[[PRED:.+]] = cmpi slt, %[[XMAX]], %[[X0]]
1757  // CHECK: %[[XLO:.+]] = select %[[PRED]], %[[XMAX]], %[[BOUND]]
1758
1759  // CHECK: %[[PRED:.+]] = cmpi slt, %[[X1]], %[[XYMIN]]
1760  // CHECK: %[[BOUND:.+]] = select %[[PRED]], %[[XYMIN]], %[[X1]]
1761  // CHECK: %[[PRED:.+]] = cmpi slt, %[[XMAX]], %[[X1]]
1762  // CHECK: %[[XHI:.+]] = select %[[PRED]], %[[XMAX]], %[[BOUND]]
1763
1764  // Extract each corner of the bilinear interpolation.
1765
1766  // CHECK: %[[YLOI:.+]] = index_cast %[[YLO]]
1767  // CHECK: %[[YHII:.+]] = index_cast %[[YHI]]
1768  // CHECK: %[[XLOI:.+]] = index_cast %[[XLO]]
1769  // CHECK: %[[XHII:.+]] = index_cast %[[XHI]]
1770
1771  // CHECK: %[[LOLO:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YLOI]], %[[XLOI]], %[[IDX3]]]
1772  // CHECK: %[[LOHI:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YLOI]], %[[XHII]], %[[IDX3]]]
1773  // CHECK: %[[HILO:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YHII]], %[[XLOI]], %[[IDX3]]]
1774  // CHECK: %[[HIHI:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YHII]], %[[XHII]], %[[IDX3]]]
1775
1776  // Compute the bilinear interpolation.
1777
1778  // CHECK: %[[ONE:.+]] = constant 1.000000e+00
1779  // CHECK: %[[NDX:.+]] = subf %[[ONE]], %[[DX]]
1780  // CHECK: %[[WLOLO:.+]] = mulf %[[LOLO]], %[[NDX]]
1781  // CHECK: %[[WLOHI:.+]] = mulf %[[LOHI]], %[[DX]]
1782  // CHECK: %[[LO:.+]] = addf %[[WLOLO]], %[[WLOHI]]
1783  // CHECK: %[[WHILO:.+]] = mulf %[[HILO]], %[[NDX]]
1784  // CHECK: %[[WHIHI:.+]] = mulf %[[HIHI]], %[[DX]]
1785  // CHECK: %[[HI:.+]] = addf %[[WHILO]], %[[WHIHI]]
1786  // CHECK: %[[NDY:.+]] = subf %[[ONE]], %[[DY]]
1787  // CHECK: %[[WLO:.+]] = mulf %[[LO]], %[[NDY]]
1788  // CHECK: %[[WHI:.+]] = mulf %[[HI]], %[[DY]]
1789  // CHECK: %[[RESULT:.+]] = addf %[[WLO]], %[[WHI]]
1790  // CHECK: linalg.yield %[[RESULT]]
1791  %output = "tosa.resize"(%input) { output_size = [4, 4], stride = [0, 0], offset = [0, 0], stride_fp = [0.5 : f32, 0.5 : f32], offset_fp = [0.1 : f32, 0.2 : f32], shift = 0 : i32, mode = "BILINEAR" } : (tensor<1x2x2x1xf32>)  -> (tensor<1x4x4x1xf32>)
1792  return
1793}
1794
1795// -----
1796
1797// CHECK-LABEL: @resize_nearest_int
1798func @resize_nearest_int(%input: tensor<1x2x2x1xi32>) -> () {
1799  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1]
1800  // CHECK: %[[GENERIC:.+]] = linalg.generic
1801  // CHECK: %[[IDX0:.+]] = linalg.index 0
1802  // CHECK: %[[IDX1:.+]] = linalg.index 1
1803  // CHECK: %[[IDX2:.+]] = linalg.index 2
1804  // CHECK: %[[IDX3:.+]] = linalg.index 3
1805  // CHECK-DAG: %[[XYMIN:.+]] = constant 0
1806  // CHECK-DAG: %[[YMAX:.+]] = constant 1
1807  // CHECK-DAG: %[[XMAX:.+]] = constant 1
1808  // CHECK-DAG: %[[Y:.+]] = index_cast %[[IDX1]]
1809  // CHECK-DAG: %[[X:.+]] = index_cast %[[IDX2]]
1810  // CHECK-DAG: %[[STRIDEY:.+]] = constant 128
1811  // CHECK-DAG: %[[STRIDEX:.+]] = constant 128
1812  // CHECK-DAG: %[[OFFSETY:.+]] = constant 1
1813  // CHECK-DAG: %[[OFFSETX:.+]] = constant 2
1814  // CHECK-DAG: %[[EIGHT:.+]] = constant 8
1815  // CHECK-DAG: %[[VAL4:.+]] = muli %[[Y]], %[[STRIDEY]]
1816  // CHECK-DAG: %[[VAL5:.+]] = muli %[[X]], %[[STRIDEX]]
1817  // CHECK-DAG: %[[VAL6:.+]] = addi %[[VAL4]], %[[OFFSETY]]
1818  // CHECK-DAG: %[[VAL7:.+]] = addi %[[VAL5]], %[[OFFSETX]]
1819
1820  // Find the remainder and integer component of the target index.
1821
1822
1823  // CHECK-DAG: %[[VAL8:.+]] = shift_right_signed %[[VAL6]], %[[EIGHT]]
1824  // CHECK-DAG: %[[VAL9:.+]] = shift_right_signed %[[VAL7]], %[[EIGHT]]
1825  // CHECK-DAG: %[[VAL10:.+]] = shift_left %[[VAL8]], %[[EIGHT]]
1826  // CHECK-DAG: %[[VAL11:.+]] = shift_left %[[VAL9]], %[[EIGHT]]
1827  // CHECK-DAG: %[[VAL12:.+]] = subi %[[VAL6]], %[[VAL10]]
1828  // CHECK-DAG: %[[VAL13:.+]] = subi %[[VAL7]], %[[VAL11]]
1829
1830  // Round to the nearest index.
1831
1832  // CHECK-DAG: %[[ROUND:.+]] = constant 128
1833  // CHECK-DAG: %[[VAL16:.+]] = cmpi sge, %[[VAL12]], %[[ROUND]]
1834  // CHECK-DAG: %[[VAL17:.+]] = cmpi sge, %[[VAL13]], %[[ROUND]]
1835  // CHECK-DAG: %[[ZERO:.+]] = constant 0
1836  // CHECK-DAG: %[[ONE:.+]] = constant 1
1837  // CHECK-DAG: %[[VAL18:.+]] = select %[[VAL16]], %[[ONE]], %[[ZERO]]
1838  // CHECK-DAG: %[[VAL19:.+]] = select %[[VAL17]], %[[ONE]], %[[ZERO]]
1839  // CHECK-DAG: %[[VAL20:.+]] = addi %[[VAL8]], %[[VAL18]]
1840  // CHECK-DAG: %[[VAL21:.+]] = addi %[[VAL9]], %[[VAL19]]
1841
1842  // This section applies bound checking to be within the input image.
1843
1844  // CHECK-DAG: %[[VAL22:.+]] = cmpi slt, %[[VAL20]], %[[XYMIN]]
1845  // CHECK-DAG: %[[VAL23:.+]] = select %[[VAL22]], %[[XYMIN]], %[[VAL20]]
1846  // CHECK-DAG: %[[VAL24:.+]] = cmpi slt, %[[YMAX]], %[[VAL20]]
1847  // CHECK-DAG: %[[VAL25:.+]] = select %[[VAL24]], %[[YMAX]], %[[VAL23]]
1848  // CHECK-DAG: %[[VAL26:.+]] = cmpi slt, %[[VAL21]], %[[XYMIN]]
1849  // CHECK-DAG: %[[VAL27:.+]] = select %[[VAL26]], %[[XYMIN]], %[[VAL21]]
1850  // CHECK-DAG: %[[VAL28:.+]] = cmpi slt, %[[XMAX]], %[[VAL21]]
1851  // CHECK-DAG: %[[VAL29:.+]] = select %[[VAL28]], %[[XMAX]], %[[VAL27]]
1852
1853  // Extract the nearest value using the computed indices.
1854
1855  // CHECK-DAG: %[[IDY:.+]] = index_cast %[[VAL25]]
1856  // CHECK-DAG: %[[IDX:.+]] = index_cast %[[VAL29]]
1857  // CHECK: %[[EXTRACT:.+]] = tensor.extract %arg0[%[[IDX0]], %[[IDY]], %[[IDX]], %[[IDX3]]]
1858  // CHECK: linalg.yield %[[EXTRACT]]
1859  %output = "tosa.resize"(%input) { output_size = [4, 4], stride = [128, 128], offset = [1, 2], stride_fp = [0. : f32, 0. : f32], offset_fp = [0. : f32, 0. : f32], shift = 8 : i32, mode = "NEAREST_NEIGHBOR" } : (tensor<1x2x2x1xi32>)  -> (tensor<1x4x4x1xi32>)
1860  return
1861}
1862
1863// -----
1864
1865// CHECK-LABEL: @resize_bilinear_int
1866func @resize_bilinear_int(%input: tensor<1x2x2x1xi8>) -> () {
1867  // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1]
1868  // CHECK: %[[GENERIC:.+]] = linalg.generic
1869
1870  // CHECK: %[[IDX0:.+]] = linalg.index 0
1871  // CHECK: %[[IDX3:.+]] = linalg.index 3
1872
1873  // CHECK: %[[XYMIN:.+]] = constant 0
1874  // CHECK: %[[YMAX:.+]] = constant 1
1875  // CHECK: %[[XMAX:.+]] = constant 1
1876
1877  // CHECK: %[[Y0:.+]] = shift_right_signed
1878  // CHECK: %[[X0:.+]] = shift_right_signed
1879  // CHECK: %[[ROUNDY:.+]] = shift_left %[[Y0]]
1880  // CHECK: %[[ROUNDX:.+]] = shift_left %[[X0]]
1881  // CHECK: %[[DY:.+]] = subi %10, %[[ROUNDY]]
1882  // CHECK: %[[DX:.+]] = subi %11, %[[ROUNDX]]
1883
1884  // Compute the left, right, and top indices for the bilinear interpolation.
1885
1886  // CHECK: %[[ONE:.+]] = constant 1
1887  // CHECK: %[[Y1:.+]] = addi %[[Y0]], %[[ONE]]
1888  // CHECK: %[[X1:.+]] = addi %[[X0]], %[[ONE]]
1889
1890  // Bound check each dimension.
1891
1892  // CHECK: %[[PRED:.+]] = cmpi slt, %[[Y0]], %[[XYMIN]]
1893  // CHECK: %[[BOUND:.+]] = select %[[PRED]], %[[XYMIN]], %[[Y0]]
1894  // CHECK: %[[PRED:.+]] = cmpi slt, %[[YMAX]], %[[Y0]]
1895  // CHECK: %[[YLO:.+]] = select %[[PRED]], %[[YMAX]], %[[BOUND]]
1896
1897  // CHECK: %[[PRED:.+]] = cmpi slt, %[[Y1]], %[[XYMIN]]
1898  // CHECK: %[[BOUND:.+]] = select %[[PRED]], %[[XYMIN]], %[[Y1]]
1899  // CHECK: %[[PRED:.+]] = cmpi slt, %[[YMAX]], %[[Y1]]
1900  // CHECK: %[[YHI:.+]] = select %[[PRED]], %[[YMAX]], %[[BOUND]]
1901
1902  // CHECK: %[[PRED:.+]] = cmpi slt, %[[X0]], %[[XYMIN]]
1903  // CHECK: %[[BOUND:.+]] = select %[[PRED]], %[[XYMIN]], %[[X0]]
1904  // CHECK: %[[PRED:.+]] = cmpi slt, %[[XMAX]], %[[X0]]
1905  // CHECK: %[[XLO:.+]] = select %[[PRED]], %[[XMAX]], %[[BOUND]]
1906
1907  // CHECK: %[[PRED:.+]] = cmpi slt, %[[X1]], %[[XYMIN]]
1908  // CHECK: %[[BOUND:.+]] = select %[[PRED]], %[[XYMIN]], %[[X1]]
1909  // CHECK: %[[PRED:.+]] = cmpi slt, %[[XMAX]], %[[X1]]
1910  // CHECK: %[[XHI:.+]] = select %[[PRED]], %[[XMAX]], %[[BOUND]]
1911
1912  // Extract each corner of the bilinear interpolation.
1913
1914  // CHECK: %[[YLOI:.+]] = index_cast %[[YLO]]
1915  // CHECK: %[[YHII:.+]] = index_cast %[[YHI]]
1916  // CHECK: %[[XLOI:.+]] = index_cast %[[XLO]]
1917  // CHECK: %[[XHII:.+]] = index_cast %[[XHI]]
1918
1919  // CHECK: %[[LOLO:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YLOI]], %[[XLOI]], %[[IDX3]]]
1920  // CHECK: %[[LOHI:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YLOI]], %[[XHII]], %[[IDX3]]]
1921  // CHECK: %[[HILO:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YHII]], %[[XLOI]], %[[IDX3]]]
1922  // CHECK: %[[HIHI:.+]] = tensor.extract %arg0[%[[IDX0]], %[[YHII]], %[[XHII]], %[[IDX3]]]
1923
1924  // CHECK: %[[XLOLO:.+]] = sexti %[[LOLO]]
1925  // CHECK: %[[XLOHI:.+]] = sexti %[[LOHI]]
1926  // CHECK: %[[XHILO:.+]] = sexti %[[HILO]]
1927  // CHECK: %[[XHIHI:.+]] = sexti %[[HIHI]]
1928
1929  // Compute the bilinear interpolation.
1930
1931  // CHECK: %[[SCALE:.+]] = constant 256
1932  // CHECK: %[[NDX:.+]] = subi %[[SCALE]], %[[DX]]
1933  // CHECK: %[[WLOLO:.+]] = muli %[[XLOLO]], %[[NDX]]
1934  // CHECK: %[[WLOHI:.+]] = muli %[[XLOHI]], %[[DX]]
1935  // CHECK: %[[LO:.+]] = addi %[[WLOLO]], %[[WLOHI]]
1936  // CHECK: %[[WHILO:.+]] = muli %[[XHILO]], %[[NDX]]
1937  // CHECK: %[[WHIHI:.+]] = muli %[[XHIHI]], %[[DX]]
1938  // CHECK: %[[HI:.+]] = addi %[[WHILO]], %[[WHIHI]]
1939  // CHECK: %[[NDY:.+]] = subi %[[SCALE]], %[[DY]]
1940  // CHECK: %[[WLO:.+]] = muli %[[LO]], %[[NDY]]
1941  // CHECK: %[[WHI:.+]] = muli %[[HI]], %[[DY]]
1942  // CHECK: %[[RESULT:.+]] = addi %[[WLO]], %[[WHI]]
1943  // CHECK: linalg.yield %[[RESULT]]
1944  %output = "tosa.resize"(%input) { output_size = [4, 4], stride = [128, 128], offset = [1, 2], stride_fp = [0. : f32, 0. : f32], offset_fp = [0. : f32, 0. : f32], shift = 8 : i32, mode = "BILINEAR" } : (tensor<1x2x2x1xi8>)  -> (tensor<1x4x4x1xi32>)
1945  return
1946}
1947